}
};
+
+// WhiteSpace according to ECMA-262 5.1, 7.2.
+struct WhiteSpace {
+ static inline bool Is(uc32 c) {
+ return c == 0x0009 || // <TAB>
+ c == 0x000B || // <VT>
+ c == 0x000C || // <FF>
+ c == 0xFEFF || // <BOM>
+ // \u0020 and \u00A0 are included in unibrow::WhiteSpace.
+ unibrow::WhiteSpace::Is(c);
+ }
+};
+
+
+// WhiteSpace and LineTerminator according to ECMA-262 5.1, 7.2 and 7.3.
+struct WhiteSpaceOrLineTerminator {
+ static inline bool Is(uc32 c) {
+ return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
+ }
+};
+
} } // namespace v8::internal
#endif // V8_CHAR_PREDICATES_H_
Iterator* current,
EndMark end) {
while (*current != end) {
- if (!unicode_cache->IsWhiteSpace(**current)) return true;
+ if (!unicode_cache->IsWhiteSpaceOrLineTerminator(**current)) return true;
++*current;
}
return false;
}
bool SkipWhiteSpace() {
- if (unicode_cache_->IsWhiteSpace(ch_)) {
+ if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
Next();
return true;
}
// The '2' variant is has inclusive from and exclusive to.
-static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0,
- 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,
- 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };
+// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
+// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
+static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
+ 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
+ 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
+ 0xFEFF, 0xFF00, 0x10000 };
static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
static const int kWordRanges[] = {
}
-static inline bool IsTrimWhiteSpace(unibrow::uchar c) {
- return unibrow::WhiteSpace::Is(c) || c == 0x200b || c == 0xfeff;
-}
-
-
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) {
HandleScope scope(isolate);
ASSERT(args.length() == 3);
int length = string->length();
int left = 0;
+ UnicodeCache* unicode_cache = isolate->unicode_cache();
if (trimLeft) {
- while (left < length && IsTrimWhiteSpace(string->Get(left))) {
+ while (left < length &&
+ unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(left))) {
left++;
}
}
int right = length;
if (trimRight) {
- while (right > left && IsTrimWhiteSpace(string->Get(right - 1))) {
+ while (right > left &&
+ unicode_cache->IsWhiteSpaceOrLineTerminator(
+ string->Get(right - 1))) {
right--;
}
}
}
-static inline bool IsByteOrderMark(uc32 c) {
+// TODO(yangguo): check whether this is actually necessary.
+static inline bool IsLittleEndianByteOrderMark(uc32 c) {
// The Unicode value U+FFFE is guaranteed never to be assigned as a
// Unicode character; this implies that in a Unicode context the
// 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
// not be a U+FFFE character expressed in big-endian byte
// order). Nevertheless, we check for it to be compatible with
// Spidermonkey.
- return c == 0xFEFF || c == 0xFFFE;
+ return c == 0xFFFE;
}
int start_position = source_pos();
while (true) {
- // We treat byte-order marks (BOMs) as whitespace for better
- // compatibility with Spidermonkey and other JavaScript engines.
- while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) {
- // IsWhiteSpace() includes line terminators!
+ while (true) {
+ // Advance as long as character is a WhiteSpace or LineTerminator.
+ // Remember if the latter is the case.
if (unicode_cache_->IsLineTerminator(c0_)) {
- // Ignore line terminators, but remember them. This is necessary
- // for automatic semicolon insertion.
has_line_terminator_before_next_ = true;
+ } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
+ !IsLittleEndianByteOrderMark(c0_)) {
+ break;
}
Advance();
}
bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
+ bool IsWhiteSpaceOrLineTerminator(unibrow::uchar c) {
+ return kIsWhiteSpaceOrLineTerminator.get(c);
+ }
private:
unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
- unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
+ unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
+ unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
+ kIsWhiteSpaceOrLineTerminator;
StaticResource<Utf8Decoder> utf8_decoder_;
DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
-// This file was generated at 2012-03-06 09:55:58.934483
+// This file was generated at 2014-02-07 15:31:16.733174
#include "unicode-inl.h"
#include <stdlib.h>
}
-// Space: point.category == 'Zs'
-
-static const uint16_t kSpaceTable0Size = 4;
-static const int32_t kSpaceTable0[4] = {
- 32, 160, 5760, 6158 }; // NOLINT
-static const uint16_t kSpaceTable1Size = 5;
-static const int32_t kSpaceTable1[5] = {
- 1073741824, 10, 47, 95, 4096 }; // NOLINT
-bool Space::Is(uchar c) {
- int chunk_index = c >> 13;
- switch (chunk_index) {
- case 0: return LookupPredicate(kSpaceTable0,
- kSpaceTable0Size,
- c);
- case 1: return LookupPredicate(kSpaceTable1,
- kSpaceTable1Size,
- c);
- default: return false;
- }
-}
-
-
// Number: point.category == 'Nd'
static const uint16_t kNumberTable0Size = 56;
}
-// WhiteSpace: 'Ws' in point.properties
+// WhiteSpace: point.category == 'Zs'
-static const uint16_t kWhiteSpaceTable0Size = 7;
-static const int32_t kWhiteSpaceTable0[7] = {
- 1073741833, 13, 32, 133, 160, 5760, 6158 }; // NOLINT
-static const uint16_t kWhiteSpaceTable1Size = 7;
-static const int32_t kWhiteSpaceTable1[7] = {
- 1073741824, 10, 1073741864, 41, 47, 95, 4096 }; // NOLINT
+static const uint16_t kWhiteSpaceTable0Size = 4;
+static const int32_t kWhiteSpaceTable0[4] = {
+ 32, 160, 5760, 6158 }; // NOLINT
+static const uint16_t kWhiteSpaceTable1Size = 5;
+static const int32_t kWhiteSpaceTable1[5] = {
+ 1073741824, 10, 47, 95, 4096 }; // NOLINT
bool WhiteSpace::Is(uchar c) {
int chunk_index = c >> 13;
switch (chunk_index) {
+ kLetterTable5Size * sizeof(int32_t) // NOLINT
+ kLetterTable6Size * sizeof(int32_t) // NOLINT
+ kLetterTable7Size * sizeof(int32_t) // NOLINT
- + kSpaceTable0Size * sizeof(int32_t) // NOLINT
- + kSpaceTable1Size * sizeof(int32_t) // NOLINT
+ kNumberTable0Size * sizeof(int32_t) // NOLINT
+ kNumberTable5Size * sizeof(int32_t) // NOLINT
+ kNumberTable7Size * sizeof(int32_t) // NOLINT
struct Letter {
static bool Is(uchar c);
};
-struct Space {
- static bool Is(uchar c);
-};
struct Number {
static bool Is(uchar c);
};
}
-static bool IsWhiteSpace(uc16 c) {
- switch (c) {
- case 0x09:
- case 0x0A:
- case 0x0B:
- case 0x0C:
- case 0x0d:
- case 0x20:
- case 0xA0:
- case 0x2028:
- case 0x2029:
- case 0xFEFF:
- return true;
- default:
- return unibrow::Space::Is(c);
- }
+static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
+ // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
+ // WhiteSpace (7.2) and LineTerminator (7.3) values.
+ return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
}
-static bool NotWhiteSpace(uc16 c) {
- return !IsWhiteSpace(c);
+static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
+ return !IsWhiteSpaceOrLineTerminator(c);
}
TestCharacterClassEscapes('.', IsRegExpNewline);
TestCharacterClassEscapes('d', IsDigit);
TestCharacterClassEscapes('D', NotDigit);
- TestCharacterClassEscapes('s', IsWhiteSpace);
- TestCharacterClassEscapes('S', NotWhiteSpace);
+ TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
+ TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
TestCharacterClassEscapes('w', IsRegExpWord);
TestCharacterClassEscapes('W', NotWord);
}
+++ /dev/null
-// Copyright (c) 2009 Apple Computer, Inc. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// 3. Neither the name of the copyright holder(s) nor the names of any
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Based on LayoutTests/fast/js/script-tests/string-trim.js
-
-// References to trim(), trimLeft() and trimRight() functions for
-// testing Function's *.call() and *.apply() methods.
-
-var trim = String.prototype.trim;
-var trimLeft = String.prototype.trimLeft;
-var trimRight = String.prototype.trimRight;
-
-var testString = 'foo bar';
-var trimString = '';
-var leftTrimString = '';
-var rightTrimString = '';
-var wsString = '';
-
-var whitespace = [
- {s : '\u0009', t : 'HORIZONTAL TAB'},
- {s : '\u000A', t : 'LINE FEED OR NEW LINE'},
- {s : '\u000B', t : 'VERTICAL TAB'},
- {s : '\u000C', t : 'FORMFEED'},
- {s : '\u000D', t : 'CARRIAGE RETURN'},
- {s : '\u0020', t : 'SPACE'},
- {s : '\u00A0', t : 'NO-BREAK SPACE'},
- {s : '\u2000', t : 'EN QUAD'},
- {s : '\u2001', t : 'EM QUAD'},
- {s : '\u2002', t : 'EN SPACE'},
- {s : '\u2003', t : 'EM SPACE'},
- {s : '\u2004', t : 'THREE-PER-EM SPACE'},
- {s : '\u2005', t : 'FOUR-PER-EM SPACE'},
- {s : '\u2006', t : 'SIX-PER-EM SPACE'},
- {s : '\u2007', t : 'FIGURE SPACE'},
- {s : '\u2008', t : 'PUNCTUATION SPACE'},
- {s : '\u2009', t : 'THIN SPACE'},
- {s : '\u200A', t : 'HAIR SPACE'},
- {s : '\u3000', t : 'IDEOGRAPHIC SPACE'},
- {s : '\u2028', t : 'LINE SEPARATOR'},
- {s : '\u2029', t : 'PARAGRAPH SEPARATOR'},
- {s : '\u200B', t : 'ZERO WIDTH SPACE (category Cf)'}
-];
-
-for (var i = 0; i < whitespace.length; i++) {
- assertEquals(whitespace[i].s.trim(), '');
- assertEquals(whitespace[i].s.trimLeft(), '');
- assertEquals(whitespace[i].s.trimRight(), '');
- wsString += whitespace[i].s;
-}
-
-trimString = wsString + testString + wsString;
-leftTrimString = testString + wsString; // Trimmed from the left.
-rightTrimString = wsString + testString; // Trimmed from the right.
-
-assertEquals(wsString.trim(), '');
-assertEquals(wsString.trimLeft(), '');
-assertEquals(wsString.trimRight(), '');
-
-assertEquals(trimString.trim(), testString);
-assertEquals(trimString.trimLeft(), leftTrimString);
-assertEquals(trimString.trimRight(), rightTrimString);
-
-assertEquals(leftTrimString.trim(), testString);
-assertEquals(leftTrimString.trimLeft(), leftTrimString);
-assertEquals(leftTrimString.trimRight(), testString);
-
-assertEquals(rightTrimString.trim(), testString);
-assertEquals(rightTrimString.trimLeft(), testString);
-assertEquals(rightTrimString.trimRight(), rightTrimString);
-
-var testValues = [0, Infinity, NaN, true, false, ({}), ['an','array'],
- ({toString:function(){return 'wibble'}})
-];
-
-for (var i = 0; i < testValues.length; i++) {
- assertEquals(trim.call(testValues[i]), String(testValues[i]));
- assertEquals(trimLeft.call(testValues[i]), String(testValues[i]));
- assertEquals(trimRight.call(testValues[i]), String(testValues[i]));
-}
--- /dev/null
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+var whitespaces = [
+ // WhiteSpace defined in ECMA-262 5.1, 7.2
+ 0x0009, // Tab TAB
+ 0x000B, // Vertical Tab VT
+ 0x000C, // Form Feed FF
+ 0x0020, // Space SP
+ 0x00A0, // No-break space NBSP
+ 0xFEFF, // Byte Order Mark BOM
+
+ // LineTerminator defined in ECMA-262 5.1, 7.3
+ 0x000A, // Line Feed LF
+ 0x000D, // Carriage Return CR
+ 0x2028, // Line Separator LS
+ 0x2029, // Paragraph Separator PS
+
+ // Unicode 6.3.0 whitespaces (category 'Zs')
+ 0x1680, // Ogham Space Mark
+ 0x180E, // Mongolian Vowel Separator
+ 0x2000, // EN QUAD
+ 0x2001, // EM QUAD
+ 0x2002, // EN SPACE
+ 0x2003, // EM SPACE
+ 0x2004, // THREE-PER-EM SPACE
+ 0x2005, // FOUR-PER-EM SPACE
+ 0x2006, // SIX-PER-EM SPACE
+ 0x2007, // FIGURE SPACE
+ 0x2008, // PUNCTUATION SPACE
+ 0x2009, // THIN SPACE
+ 0x200A, // HAIR SPACE
+ 0x2028, // LINE SEPARATOR
+ 0x2029, // PARAGRAPH SEPARATOR
+ 0x202F, // NARROW NO-BREAK SPACE
+ 0x205F, // MEDIUM MATHEMATICAL SPACE
+ 0x3000, // IDEOGRAPHIC SPACE
+];
+
+// Add single twobyte char to force twobyte representation.
+// Interestingly, snowman is not "white" space :)
+var twobyte = "\u2603";
+var onebyte = "\u007E";
+var twobytespace = "\u2000";
+var onebytespace = "\u0020";
+
+function is_whitespace(c) {
+ return whitespaces.indexOf(c.charCodeAt(0)) > -1;
+}
+
+function test_regexp(str) {
+ var pos_match = str.match(/\s/);
+ var neg_match = str.match(/\S/);
+ var test_char = str[0];
+ var postfix = str[1];
+ if (is_whitespace(test_char)) {
+ assertEquals(test_char, pos_match[0]);
+ assertEquals(postfix, neg_match[0]);
+ } else {
+ assertEquals(test_char, neg_match[0]);
+ assertNull(pos_match);
+ }
+}
+
+function test_trim(c, infix) {
+ var str = c + c + c + infix + c;
+ if (is_whitespace(c)) {
+ assertEquals(infix, str.trim());
+ } else {
+ assertEquals(str, str.trim());
+ }
+}
+
+function test_parseInt(c, postfix) {
+ // Skip if prefix is a digit.
+ if (c >= "0" && c <= "9") return;
+ var str = c + c + "123" + postfix;
+ if (is_whitespace(c)) {
+ assertEquals(123, parseInt(str));
+ } else {
+ assertEquals(NaN, parseInt(str));
+ }
+}
+
+function test_eval(c, content) {
+ if (!is_whitespace(c)) return;
+ var str = c + c + "'" + content + "'" + c + c;
+ assertEquals(content, eval(str));
+}
+
+function test_stringtonumber(c, postfix) {
+ // Skip if prefix is a digit.
+ if (c >= "0" && c <= "9") return;
+ var result = 1 + Number(c + "123" + c + postfix);
+ if (is_whitespace(c)) {
+ assertEquals(124, result);
+ } else {
+ assertEquals(NaN, result);
+ }
+}
+
+for (var i = 0; i < 0x10000; i++) {
+ c = String.fromCharCode(i);
+ test_regexp(c + onebyte);
+ test_regexp(c + twobyte);
+ test_trim(c, onebyte + "trim");
+ test_trim(c, twobyte + "trim");
+ test_parseInt(c, onebyte);
+ test_parseInt(c, twobyte);
+ test_eval(c, onebyte);
+ test_eval(c, twobyte);
+ test_stringtonumber(c, onebytespace);
+ test_stringtonumber(c, twobytespace);
+}
PASS whitespace[20].s.trim() is ''
PASS whitespace[20].s.trimLeft() is ''
PASS whitespace[20].s.trimRight() is ''
-PASS whitespace[21].s.trim() is ''
-PASS whitespace[21].s.trimLeft() is ''
-PASS whitespace[21].s.trimRight() is ''
-PASS wsString.trim() is ''
-PASS wsString.trimLeft() is ''
-PASS wsString.trimRight() is ''
-PASS trimString.trim() is testString
-PASS trimString.trimLeft() is leftTrimString
-PASS trimString.trimRight() is rightTrimString
-PASS leftTrimString.trim() is testString
+FAIL whitespace[21].s.trim() should be . Was .
+FAIL whitespace[21].s.trimLeft() should be . Was .
+FAIL whitespace[21].s.trimRight() should be . Was .
+FAIL wsString.trim() should be . Was .
+FAIL wsString.trimLeft() should be . Was .
+FAIL wsString.trimRight() should be . Was
+\v\f
+
.
+FAIL trimString.trim() should be foo bar. Was foo bar
+\v\f
+
.
+FAIL trimString.trimLeft() should be foo bar
+\v\f
+
. Was foo bar
+\v\f
+
.
+FAIL trimString.trimRight() should be
+\v\f
+
foo bar. Was
+\v\f
+
foo bar
+\v\f
+
.
+FAIL leftTrimString.trim() should be foo bar. Was foo bar
+\v\f
+
.
PASS leftTrimString.trimLeft() is leftTrimString
-PASS leftTrimString.trimRight() is testString
-PASS rightTrimString.trim() is testString
-PASS rightTrimString.trimLeft() is testString
+FAIL leftTrimString.trimRight() should be foo bar. Was foo bar
+\v\f
+
.
+FAIL rightTrimString.trim() should be foo bar. Was foo bar.
+FAIL rightTrimString.trimLeft() should be foo bar. Was foo bar.
PASS rightTrimString.trimRight() is rightTrimString
PASS trim.call(0) is '0'
PASS trimLeft.call(0) is '0'