Fix inconsistencies wrt whitespaces.

author yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 10 Feb 2014 12:43:10 +0000 (12:43 +0000)

committer yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 10 Feb 2014 12:43:10 +0000 (12:43 +0000)
author yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 10 Feb 2014 12:43:10 +0000 (12:43 +0000)
committer yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 10 Feb 2014 12:43:10 +0000 (12:43 +0000)
diff --git a/src/char-predicates.h b/src/char-predicates.h

index 767ad6513afa7e3d5995a31a1df22372bf7a9484..f52feda6c15caa89af07f7105e8cf8fcce14789b 100644 (file)
--- a/src/char-predicates.h
+++ b/src/char-predicates.h
@@ -66,6 +66,27 @@ struct IdentifierPart {
    }
  };
  
+
+// WhiteSpace according to ECMA-262 5.1, 7.2.
+struct WhiteSpace {
+  static inline bool Is(uc32 c) {
+    return c == 0x0009 ||  // <TAB>
+           c == 0x000B ||  // <VT>
+           c == 0x000C ||  // <FF>
+           c == 0xFEFF ||  // <BOM>
+           // \u0020 and \u00A0 are included in unibrow::WhiteSpace.
+           unibrow::WhiteSpace::Is(c);
+  }
+};
+
+
+// WhiteSpace and LineTerminator according to ECMA-262 5.1, 7.2 and 7.3.
+struct WhiteSpaceOrLineTerminator {
+  static inline bool Is(uc32 c) {
+    return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
+  }
+};
+
  } }  // namespace v8::internal
  
  #endif  // V8_CHAR_PREDICATES_H_
diff --git a/src/conversions-inl.h b/src/conversions-inl.h

index 3cb7ef29924ae1e7c058c65ef4a8fb5f3515d14b..e503eb5027a0456795fd6cacdec32782e8595326 100644 (file)
--- a/src/conversions-inl.h
+++ b/src/conversions-inl.h
@@ -128,7 +128,7 @@ inline bool AdvanceToNonspace(UnicodeCache* unicode_cache,
                                Iterator* current,
                                EndMark end) {
    while (*current != end) {
-    if (!unicode_cache->IsWhiteSpace(**current)) return true;
+    if (!unicode_cache->IsWhiteSpaceOrLineTerminator(**current)) return true;
      ++*current;
    }
    return false;
diff --git a/src/dateparser.h b/src/dateparser.h

index 27584ce39efd4f50a6f11b055983dd51c907486c..7dc489de341adb90eef13c10e3aa38fa66a7ef29 100644 (file)
--- a/src/dateparser.h
+++ b/src/dateparser.h
@@ -122,7 +122,7 @@ class DateParser : public AllStatic {
      }
  
      bool SkipWhiteSpace() {
-      if (unicode_cache_->IsWhiteSpace(ch_)) {
+      if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
          Next();
          return true;
        }
diff --git a/src/jsregexp.cc b/src/jsregexp.cc

index edd2eacd3dff7d7536ab8371dca18a0fe7b674f0..b21c8e14f319591f52a93157740615adc48bd366 100644 (file)
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -3597,9 +3597,12 @@ class AlternativeGenerationList {
  
  
  // The '2' variant is has inclusive from and exclusive to.
-static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0,
-    0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,
-    0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };
+// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
+// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
+static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
+    0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
+    0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
+    0xFEFF, 0xFF00, 0x10000 };
  static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
  
  static const int kWordRanges[] = {
diff --git a/src/runtime.cc b/src/runtime.cc

index 3596add429ee9335994c525ad946be41648d6842..b9a09c1def8d39016687e917b8405f3f39edea1d 100644 (file)
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -6541,11 +6541,6 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) {
  }
  
  
-static inline bool IsTrimWhiteSpace(unibrow::uchar c) {
-  return unibrow::WhiteSpace::Is(c) || c == 0x200b || c == 0xfeff;
-}
-
-
  RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) {
    HandleScope scope(isolate);
    ASSERT(args.length() == 3);
@@ -6558,15 +6553,19 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) {
    int length = string->length();
  
    int left = 0;
+  UnicodeCache* unicode_cache = isolate->unicode_cache();
    if (trimLeft) {
-    while (left < length && IsTrimWhiteSpace(string->Get(left))) {
+    while (left < length &&
+           unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(left))) {
        left++;
      }
    }
  
    int right = length;
    if (trimRight) {
-    while (right > left && IsTrimWhiteSpace(string->Get(right - 1))) {
+    while (right > left &&
+           unicode_cache->IsWhiteSpaceOrLineTerminator(
+               string->Get(right - 1))) {
        right--;
      }
    }
diff --git a/src/scanner.cc b/src/scanner.cc

index 26f840b23a5914e9e5d52c207d5f9ed904e15a9d..27768547fb7b56ab70d51b2df0aed7a093510d2e 100644 (file)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -246,7 +246,8 @@ Token::Value Scanner::Next() {
  }
  
  
-static inline bool IsByteOrderMark(uc32 c) {
+// TODO(yangguo): check whether this is actually necessary.
+static inline bool IsLittleEndianByteOrderMark(uc32 c) {
    // The Unicode value U+FFFE is guaranteed never to be assigned as a
    // Unicode character; this implies that in a Unicode context the
    // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
@@ -254,7 +255,7 @@ static inline bool IsByteOrderMark(uc32 c) {
    // not be a U+FFFE character expressed in big-endian byte
    // order). Nevertheless, we check for it to be compatible with
    // Spidermonkey.
-  return c == 0xFEFF || c == 0xFFFE;
+  return c == 0xFFFE;
  }
  
  
@@ -262,14 +263,14 @@ bool Scanner::SkipWhiteSpace() {
    int start_position = source_pos();
  
    while (true) {
-    // We treat byte-order marks (BOMs) as whitespace for better
-    // compatibility with Spidermonkey and other JavaScript engines.
-    while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) {
-      // IsWhiteSpace() includes line terminators!
+    while (true) {
+      // Advance as long as character is a WhiteSpace or LineTerminator.
+      // Remember if the latter is the case.
        if (unicode_cache_->IsLineTerminator(c0_)) {
-        // Ignore line terminators, but remember them. This is necessary
-        // for automatic semicolon insertion.
          has_line_terminator_before_next_ = true;
+      } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
+                 !IsLittleEndianByteOrderMark(c0_)) {
+        break;
        }
        Advance();
      }
diff --git a/src/scanner.h b/src/scanner.h

index 3cefc833ac3323c536ca98015fdfbee1522060d5..b08692b3aed1ebd7bc66cdd840ab2a46d4b93d56 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -139,12 +139,17 @@ class UnicodeCache {
    bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
    bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
    bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
+  bool IsWhiteSpaceOrLineTerminator(unibrow::uchar c) {
+    return kIsWhiteSpaceOrLineTerminator.get(c);
+  }
  
   private:
    unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
    unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
    unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
-  unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
+  unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
+  unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
+      kIsWhiteSpaceOrLineTerminator;
    StaticResource<Utf8Decoder> utf8_decoder_;
  
    DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
diff --git a/src/unicode.cc b/src/unicode.cc

index bd32467786fcaa00617692851309b7eb74ca4995..2bef7ab20b18f5f01d07a364110d96d84b5a9cd1 100644 (file)
--- a/src/unicode.cc
+++ b/src/unicode.cc
@@ -25,7 +25,7 @@
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  //
-// This file was generated at 2012-03-06 09:55:58.934483
+// This file was generated at 2014-02-07 15:31:16.733174
  
  #include "unicode-inl.h"
  #include <stdlib.h>
@@ -710,28 +710,6 @@ bool Letter::Is(uchar c) {
  }
  
  
-// Space:                point.category == 'Zs'
-
-static const uint16_t kSpaceTable0Size = 4;
-static const int32_t kSpaceTable0[4] = {
-  32, 160, 5760, 6158 };  // NOLINT
-static const uint16_t kSpaceTable1Size = 5;
-static const int32_t kSpaceTable1[5] = {
-  1073741824, 10, 47, 95, 4096 };  // NOLINT
-bool Space::Is(uchar c) {
-  int chunk_index = c >> 13;
-  switch (chunk_index) {
-    case 0: return LookupPredicate(kSpaceTable0,
-                                       kSpaceTable0Size,
-                                       c);
-    case 1: return LookupPredicate(kSpaceTable1,
-                                       kSpaceTable1Size,
-                                       c);
-    default: return false;
-  }
-}
-
-
  // Number:               point.category == 'Nd'
  
  static const uint16_t kNumberTable0Size = 56;
@@ -767,14 +745,14 @@ bool Number::Is(uchar c) {
  }
  
  
-// WhiteSpace:           'Ws' in point.properties
+// WhiteSpace:           point.category == 'Zs'
  
-static const uint16_t kWhiteSpaceTable0Size = 7;
-static const int32_t kWhiteSpaceTable0[7] = {
-  1073741833, 13, 32, 133, 160, 5760, 6158 };  // NOLINT
-static const uint16_t kWhiteSpaceTable1Size = 7;
-static const int32_t kWhiteSpaceTable1[7] = {
-  1073741824, 10, 1073741864, 41, 47, 95, 4096 };  // NOLINT
+static const uint16_t kWhiteSpaceTable0Size = 4;
+static const int32_t kWhiteSpaceTable0[4] = {
+  32, 160, 5760, 6158 };  // NOLINT
+static const uint16_t kWhiteSpaceTable1Size = 5;
+static const int32_t kWhiteSpaceTable1[5] = {
+  1073741824, 10, 47, 95, 4096 };  // NOLINT
  bool WhiteSpace::Is(uchar c) {
    int chunk_index = c >> 13;
    switch (chunk_index) {
@@ -1833,8 +1811,6 @@ int UnicodeData::GetByteCount() {
        + kLetterTable5Size * sizeof(int32_t)  // NOLINT
        + kLetterTable6Size * sizeof(int32_t)  // NOLINT
        + kLetterTable7Size * sizeof(int32_t)  // NOLINT
-      + kSpaceTable0Size * sizeof(int32_t)  // NOLINT
-      + kSpaceTable1Size * sizeof(int32_t)  // NOLINT
        + kNumberTable0Size * sizeof(int32_t)  // NOLINT
        + kNumberTable5Size * sizeof(int32_t)  // NOLINT
        + kNumberTable7Size * sizeof(int32_t)  // NOLINT
diff --git a/src/unicode.h b/src/unicode.h

index bb5506d38e2531b0f60fd73f8676c3ad77277ab7..65a9af58fc6ad61ecf77ddca1d835e0546058df7 100644 (file)
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -226,9 +226,6 @@ struct Lowercase {
  struct Letter {
    static bool Is(uchar c);
  };
-struct Space {
-  static bool Is(uchar c);
-};
  struct Number {
    static bool Is(uchar c);
  };
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc

index d0193520fae9a5e225ff7d1379b4fcf61653df08..fda6ea2b8a89493b8921b4cf39ccfec47fdc0caf 100644 (file)
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -444,27 +444,15 @@ static bool NotDigit(uc16 c) {
  }
  
  
-static bool IsWhiteSpace(uc16 c) {
-  switch (c) {
-    case 0x09:
-    case 0x0A:
-    case 0x0B:
-    case 0x0C:
-    case 0x0d:
-    case 0x20:
-    case 0xA0:
-    case 0x2028:
-    case 0x2029:
-    case 0xFEFF:
-      return true;
-    default:
-      return unibrow::Space::Is(c);
-  }
+static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
+  // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
+  // WhiteSpace (7.2) and LineTerminator (7.3) values.
+  return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
  }
  
  
-static bool NotWhiteSpace(uc16 c) {
-  return !IsWhiteSpace(c);
+static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
+  return !IsWhiteSpaceOrLineTerminator(c);
  }
  
  
@@ -494,8 +482,8 @@ TEST(CharacterClassEscapes) {
    TestCharacterClassEscapes('.', IsRegExpNewline);
    TestCharacterClassEscapes('d', IsDigit);
    TestCharacterClassEscapes('D', NotDigit);
-  TestCharacterClassEscapes('s', IsWhiteSpace);
-  TestCharacterClassEscapes('S', NotWhiteSpace);
+  TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
+  TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
    TestCharacterClassEscapes('w', IsRegExpWord);
    TestCharacterClassEscapes('W', NotWord);
  }
diff --git a/test/mjsunit/third_party/string-trim.js b/test/mjsunit/third_party/string-trim.js

deleted file mode 100644 (file)

index 234dff6..0000000
--- a/test/mjsunit/third_party/string-trim.js
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright (c) 2009 Apple Computer, Inc. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// 3. Neither the name of the copyright holder(s) nor the names of any
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Based on LayoutTests/fast/js/script-tests/string-trim.js
-
-// References to trim(), trimLeft() and trimRight() functions for 
-// testing Function's *.call() and *.apply() methods.
-
-var trim            = String.prototype.trim;
-var trimLeft        = String.prototype.trimLeft;
-var trimRight       = String.prototype.trimRight;
-
-var testString      = 'foo bar';
-var trimString      = '';
-var leftTrimString  = '';
-var rightTrimString = '';
-var wsString        = '';
-
-var whitespace      = [
-  {s : '\u0009', t : 'HORIZONTAL TAB'},
-  {s : '\u000A', t : 'LINE FEED OR NEW LINE'},
-  {s : '\u000B', t : 'VERTICAL TAB'},
-  {s : '\u000C', t : 'FORMFEED'},
-  {s : '\u000D', t : 'CARRIAGE RETURN'},
-  {s : '\u0020', t : 'SPACE'},
-  {s : '\u00A0', t : 'NO-BREAK SPACE'},
-  {s : '\u2000', t : 'EN QUAD'},
-  {s : '\u2001', t : 'EM QUAD'},
-  {s : '\u2002', t : 'EN SPACE'},
-  {s : '\u2003', t : 'EM SPACE'},
-  {s : '\u2004', t : 'THREE-PER-EM SPACE'},
-  {s : '\u2005', t : 'FOUR-PER-EM SPACE'},
-  {s : '\u2006', t : 'SIX-PER-EM SPACE'},
-  {s : '\u2007', t : 'FIGURE SPACE'},
-  {s : '\u2008', t : 'PUNCTUATION SPACE'},
-  {s : '\u2009', t : 'THIN SPACE'},
-  {s : '\u200A', t : 'HAIR SPACE'},
-  {s : '\u3000', t : 'IDEOGRAPHIC SPACE'},
-  {s : '\u2028', t : 'LINE SEPARATOR'},
-  {s : '\u2029', t : 'PARAGRAPH SEPARATOR'},
-  {s : '\u200B', t : 'ZERO WIDTH SPACE (category Cf)'}
-];
-
-for (var i = 0; i < whitespace.length; i++) {
-  assertEquals(whitespace[i].s.trim(), '');
-  assertEquals(whitespace[i].s.trimLeft(), '');
-  assertEquals(whitespace[i].s.trimRight(), '');
-  wsString += whitespace[i].s;
-}
-
-trimString      = wsString   + testString + wsString;
-leftTrimString  = testString + wsString;  // Trimmed from the left.
-rightTrimString = wsString   + testString;  // Trimmed from the right.
-
-assertEquals(wsString.trim(),      '');
-assertEquals(wsString.trimLeft(),  '');
-assertEquals(wsString.trimRight(), '');
-
-assertEquals(trimString.trim(),      testString);
-assertEquals(trimString.trimLeft(),  leftTrimString);
-assertEquals(trimString.trimRight(), rightTrimString);
-
-assertEquals(leftTrimString.trim(),      testString);
-assertEquals(leftTrimString.trimLeft(),  leftTrimString);
-assertEquals(leftTrimString.trimRight(), testString);
-
-assertEquals(rightTrimString.trim(),      testString);
-assertEquals(rightTrimString.trimLeft(),  testString);
-assertEquals(rightTrimString.trimRight(), rightTrimString);
-
-var testValues = [0, Infinity, NaN, true, false, ({}), ['an','array'],
-  ({toString:function(){return 'wibble'}})
-];
-
-for (var i = 0; i < testValues.length; i++) {
-  assertEquals(trim.call(testValues[i]), String(testValues[i]));
-  assertEquals(trimLeft.call(testValues[i]), String(testValues[i]));
-  assertEquals(trimRight.call(testValues[i]), String(testValues[i]));
-}
diff --git a/test/mjsunit/whitespaces.js b/test/mjsunit/whitespaces.js

new file mode 100644 (file)

index 0000000..78e4ad5
--- /dev/null
+++ b/test/mjsunit/whitespaces.js
@@ -0,0 +1,115 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+var whitespaces = [
+  // WhiteSpace defined in ECMA-262 5.1, 7.2
+  0x0009,  // Tab                  TAB
+  0x000B,  // Vertical Tab         VT
+  0x000C,  // Form Feed            FF
+  0x0020,  // Space                SP
+  0x00A0,  // No-break space       NBSP
+  0xFEFF,  // Byte Order Mark      BOM
+
+  // LineTerminator defined in ECMA-262 5.1, 7.3
+  0x000A,  // Line Feed            LF
+  0x000D,  // Carriage Return      CR
+  0x2028,  // Line Separator       LS
+  0x2029,  // Paragraph Separator  PS
+
+  // Unicode 6.3.0 whitespaces (category 'Zs')
+  0x1680,  // Ogham Space Mark
+  0x180E,  // Mongolian Vowel Separator
+  0x2000,  // EN QUAD
+  0x2001,  // EM QUAD
+  0x2002,  // EN SPACE
+  0x2003,  // EM SPACE
+  0x2004,  // THREE-PER-EM SPACE
+  0x2005,  // FOUR-PER-EM SPACE
+  0x2006,  // SIX-PER-EM SPACE
+  0x2007,  // FIGURE SPACE
+  0x2008,  // PUNCTUATION SPACE
+  0x2009,  // THIN SPACE
+  0x200A,  // HAIR SPACE
+  0x2028,  // LINE SEPARATOR
+  0x2029,  // PARAGRAPH SEPARATOR
+  0x202F,  // NARROW NO-BREAK SPACE
+  0x205F,  // MEDIUM MATHEMATICAL SPACE
+  0x3000,  // IDEOGRAPHIC SPACE
+];
+
+// Add single twobyte char to force twobyte representation.
+// Interestingly, snowman is not "white" space :)
+var twobyte = "\u2603";
+var onebyte = "\u007E";
+var twobytespace = "\u2000";
+var onebytespace = "\u0020";
+
+function is_whitespace(c) {
+  return whitespaces.indexOf(c.charCodeAt(0)) > -1;
+}
+
+function test_regexp(str) {
+  var pos_match = str.match(/\s/);
+  var neg_match = str.match(/\S/);
+  var test_char = str[0];
+  var postfix = str[1];
+  if (is_whitespace(test_char)) {
+    assertEquals(test_char, pos_match[0]);
+    assertEquals(postfix, neg_match[0]);
+  } else {
+    assertEquals(test_char, neg_match[0]);
+    assertNull(pos_match);
+  }
+}
+
+function test_trim(c, infix) {
+  var str = c + c + c + infix + c;
+  if (is_whitespace(c)) {
+    assertEquals(infix, str.trim());
+  } else {
+    assertEquals(str, str.trim());
+  }
+}
+
+function test_parseInt(c, postfix) {
+  // Skip if prefix is a digit.
+  if (c >= "0" && c <= "9") return;
+  var str = c + c + "123" + postfix;
+  if (is_whitespace(c)) {
+    assertEquals(123, parseInt(str));
+  } else {
+    assertEquals(NaN, parseInt(str));
+  }
+}
+
+function test_eval(c, content) {
+  if (!is_whitespace(c)) return;
+  var str = c + c + "'" + content + "'" + c + c;
+  assertEquals(content, eval(str));
+}
+
+function test_stringtonumber(c, postfix) {
+  // Skip if prefix is a digit.
+  if (c >= "0" && c <= "9") return;
+  var result = 1 + Number(c + "123" + c + postfix);
+  if (is_whitespace(c)) {
+    assertEquals(124, result);
+  } else {
+    assertEquals(NaN, result);
+  }
+}
+
+for (var i = 0; i < 0x10000; i++) {
+  c = String.fromCharCode(i);
+  test_regexp(c + onebyte);
+  test_regexp(c + twobyte);
+  test_trim(c, onebyte + "trim");
+  test_trim(c, twobyte + "trim");
+  test_parseInt(c, onebyte);
+  test_parseInt(c, twobyte);
+  test_eval(c, onebyte);
+  test_eval(c, twobyte);
+  test_stringtonumber(c, onebytespace);
+  test_stringtonumber(c, twobytespace);
+}
diff --git a/test/webkit/string-trim-expected.txt b/test/webkit/string-trim-expected.txt

index 9540f1c8dbd09c3a65178756180939d733e0775c..6472f89d0b4812687e386f85c6d88d2d4a9ad58a 100644 (file)
--- a/test/webkit/string-trim-expected.txt
+++ b/test/webkit/string-trim-expected.txt
@@ -89,20 +89,38 @@ PASS whitespace[19].s.trimRight() is ''
  PASS whitespace[20].s.trim() is ''
  PASS whitespace[20].s.trimLeft() is ''
  PASS whitespace[20].s.trimRight() is ''
-PASS whitespace[21].s.trim() is ''
-PASS whitespace[21].s.trimLeft() is ''
-PASS whitespace[21].s.trimRight() is ''
-PASS wsString.trim() is ''
-PASS wsString.trimLeft() is ''
-PASS wsString.trimRight() is ''
-PASS trimString.trim() is testString
-PASS trimString.trimLeft() is leftTrimString
-PASS trimString.trimRight() is rightTrimString
-PASS leftTrimString.trim() is testString
+FAIL whitespace[21].s.trim() should be . Was .
+FAIL whitespace[21].s.trimLeft() should be . Was .
+FAIL whitespace[21].s.trimRight() should be . Was .
+FAIL wsString.trim() should be . Was .
+FAIL wsString.trimLeft() should be . Was .
+FAIL wsString.trimRight() should be . Was   
+\v\f
+             　  .
+FAIL trimString.trim() should be foo bar. Was foo bar  
+\v\f
+             　  .
+FAIL trimString.trimLeft() should be foo bar  
+\v\f
+             　  . Was foo bar 
+\v\f
+             　  .
+FAIL trimString.trimRight() should be   
+\v\f
+             　  foo bar. Was   
+\v\f
+             　  foo bar  
+\v\f
+             　  .
+FAIL leftTrimString.trim() should be foo bar. Was foo bar 
+\v\f
+             　  .
  PASS leftTrimString.trimLeft() is leftTrimString
-PASS leftTrimString.trimRight() is testString
-PASS rightTrimString.trim() is testString
-PASS rightTrimString.trimLeft() is testString
+FAIL leftTrimString.trimRight() should be foo bar. Was foo bar  
+\v\f
+             　  .
+FAIL rightTrimString.trim() should be foo bar. Was foo bar.
+FAIL rightTrimString.trimLeft() should be foo bar. Was foo bar.
  PASS rightTrimString.trimRight() is rightTrimString
  PASS trim.call(0) is '0'
  PASS trimLeft.call(0) is '0'
author	yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 10 Feb 2014 12:43:10 +0000 (12:43 +0000)
committer	yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 10 Feb 2014 12:43:10 +0000 (12:43 +0000)
src/char-predicates.h		patch \| blob \| history
src/conversions-inl.h		patch \| blob \| history
src/dateparser.h		patch \| blob \| history
src/jsregexp.cc		patch \| blob \| history
src/runtime.cc		patch \| blob \| history
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history
src/unicode.cc		patch \| blob \| history
src/unicode.h		patch \| blob \| history
test/cctest/test-regexp.cc		patch \| blob \| history
test/mjsunit/third_party/string-trim.js	[deleted file]	patch \| blob \| history
test/mjsunit/whitespaces.js	[new file with mode: 0644]	patch \| blob
test/webkit/string-trim-expected.txt		patch \| blob \| history