From: yangguo@chromium.org Date: Fri, 7 Feb 2014 14:13:00 +0000 (+0000) Subject: Revert "Fix inconsistencies wrt whitespaces." X-Git-Tag: upstream/4.7.83~10815 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=db1a685b8f250b99ffda15346e56efd501166193;p=platform%2Fupstream%2Fv8.git Revert "Fix inconsistencies wrt whitespaces." This reverts r19196. TBR=mstarzinger@chromium.org Review URL: https://codereview.chromium.org/147443008 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19199 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc index 2317064..cbc34e1 100644 --- a/src/arm/regexp-macro-assembler-arm.cc +++ b/src/arm/regexp-macro-assembler-arm.cc @@ -497,8 +497,6 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type, __ b(ls, &success); // \u00a0 (NBSP). __ cmp(r0, Operand(0x00a0 - '\t')); - // \u0085 (NEL). - __ cmp(r0, Operand(0x0085 - '\t'), ne); BranchOrBacktrack(ne, on_no_match); __ bind(&success); return true; diff --git a/src/char-predicates.h b/src/char-predicates.h index 7436931..767ad65 100644 --- a/src/char-predicates.h +++ b/src/char-predicates.h @@ -66,14 +66,6 @@ struct IdentifierPart { } }; - -struct WhiteSpace { - static inline bool Is(uc32 c) { - return unibrow::WhiteSpace::Is(c) || - c == 0xFEFF; // BYTE ORDER MARK is a white space in ECMA-262 5.1, 7.2. - } -}; - } } // namespace v8::internal #endif // V8_CHAR_PREDICATES_H_ diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc index 2e82c0e..d371c45 100644 --- a/src/ia32/regexp-macro-assembler-ia32.cc +++ b/src/ia32/regexp-macro-assembler-ia32.cc @@ -526,9 +526,6 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, __ j(below_equal, &success, Label::kNear); // \u00a0 (NBSP). __ cmp(eax, 0x00a0 - '\t'); - __ j(equal, &success, Label::kNear); - // \u0085 (NEL). - __ cmp(eax, 0x0085 - '\t'); BranchOrBacktrack(not_equal, on_no_match); __ bind(&success); return true; diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 3d7ccf2..edd2eac 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -3597,10 +3597,9 @@ class AlternativeGenerationList { // The '2' variant is has inclusive from and exclusive to. -static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, - 0x0085, 0x0086, 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, - 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, - 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 }; +static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, + 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, + 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 }; static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); static const int kWordRanges[] = { diff --git a/src/runtime.cc b/src/runtime.cc index 05de32d..3596add 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -6105,10 +6105,8 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToNumber) { // Fast check for a junk value. A valid string may start from a // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit or // the 'I' character ('Infinity'). All of that have codes not greater than - // '9' except 'I', NBSP and NEL. - if (data[start_pos] != 'I' && - data[start_pos] != 0xa0 && - data[start_pos] != 0x85) { + // '9' except 'I' and  . + if (data[start_pos] != 'I' && data[start_pos] != 0xa0) { return isolate->heap()->nan_value(); } } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) { @@ -6543,6 +6541,11 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { } +static inline bool IsTrimWhiteSpace(unibrow::uchar c) { + return unibrow::WhiteSpace::Is(c) || c == 0x200b || c == 0xfeff; +} + + RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) { HandleScope scope(isolate); ASSERT(args.length() == 3); @@ -6555,17 +6558,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) { int length = string->length(); int left = 0; - UnicodeCache* unicode_cache = isolate->unicode_cache(); if (trimLeft) { - while (left < length && unicode_cache->IsWhiteSpace(string->Get(left))) { + while (left < length && IsTrimWhiteSpace(string->Get(left))) { left++; } } int right = length; if (trimRight) { - while (right > left && - unicode_cache->IsWhiteSpace(string->Get(right - 1))) { + while (right > left && IsTrimWhiteSpace(string->Get(right - 1))) { right--; } } diff --git a/src/scanner.h b/src/scanner.h index be84027..3cefc83 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -144,7 +144,7 @@ class UnicodeCache { unibrow::Predicate kIsIdentifierStart; unibrow::Predicate kIsIdentifierPart; unibrow::Predicate kIsLineTerminator; - unibrow::Predicate kIsWhiteSpace; + unibrow::Predicate kIsWhiteSpace; StaticResource utf8_decoder_; DISALLOW_COPY_AND_ASSIGN(UnicodeCache); diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc index 068991c..75e70c5 100644 --- a/src/x64/regexp-macro-assembler-x64.cc +++ b/src/x64/regexp-macro-assembler-x64.cc @@ -552,9 +552,6 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, __ j(below_equal, &success, Label::kNear); // \u00a0 (NBSP). __ cmpl(rax, Immediate(0x00a0 - '\t')); - __ j(equal, &success, Label::kNear); - // \u0085 (NEL). - __ cmpl(rax, Immediate(0x0085 - '\t')); BranchOrBacktrack(not_equal, on_no_match); __ bind(&success); return true; diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index 78cc88e..d019352 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -445,7 +445,21 @@ static bool NotDigit(uc16 c) { static bool IsWhiteSpace(uc16 c) { - return v8::internal::WhiteSpace::Is(c); + switch (c) { + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0d: + case 0x20: + case 0xA0: + case 0x2028: + case 0x2029: + case 0xFEFF: + return true; + default: + return unibrow::Space::Is(c); + } } diff --git a/test/mjsunit/third_party/string-trim.js b/test/mjsunit/third_party/string-trim.js index 44a7a9d..234dff6 100644 --- a/test/mjsunit/third_party/string-trim.js +++ b/test/mjsunit/third_party/string-trim.js @@ -66,8 +66,7 @@ var whitespace = [ {s : '\u3000', t : 'IDEOGRAPHIC SPACE'}, {s : '\u2028', t : 'LINE SEPARATOR'}, {s : '\u2029', t : 'PARAGRAPH SEPARATOR'}, - // \u200B is not a whitespace character according to Unicode 6.3.0. - // {s : '\u200B', t : 'ZERO WIDTH SPACE (category Cf)'} + {s : '\u200B', t : 'ZERO WIDTH SPACE (category Cf)'} ]; for (var i = 0; i < whitespace.length; i++) { diff --git a/test/mjsunit/whitespaces.js b/test/mjsunit/whitespaces.js deleted file mode 100644 index 39891e7..0000000 --- a/test/mjsunit/whitespaces.js +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2014 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -var whitespaces = [ - // Whitespaces defined in ECMA-262 5.1, 7.2 - 0x0009, // Tab TAB - 0x000B, // Vertical Tab VT - 0x000C, // Form Feed FF - 0x0020, // Space SP - 0x00A0, // No-break space NBSP - 0xFEFF, // Byte Order Mark BOM - // Unicode whitespaces - 0x000A, // Line Feed LF - 0x000D, // Carriage Return CR - 0x0085, // Next Line NEL - 0x1680, // Ogham Space Mark - 0x180E, // Mongolian Vowel Separator - 0x2000, // EN QUAD - 0x2001, // EM QUAD - 0x2002, // EN SPACE - 0x2003, // EM SPACE - 0x2004, // THREE-PER-EM SPACE - 0x2005, // FOUR-PER-EM SPACE - 0x2006, // SIX-PER-EM SPACE - 0x2007, // FIGURE SPACE - 0x2008, // PUNCTUATION SPACE - 0x2009, // THIN SPACE - 0x200A, // HAIR SPACE - 0x2028, // LINE SEPARATOR - 0x2029, // PARAGRAPH SEPARATOR - 0x202F, // NARROW NO-BREAK SPACE - 0x205F, // MEDIUM MATHEMATICAL SPACE - 0x3000, // IDEOGRAPHIC SPACE -]; - -// Add single twobyte char to force twobyte representation. -// Interestingly, snowman is not "white" space :) -var twobyte = "\u2603"; -var onebyte = "\u007E"; -var twobytespace = "\u2000"; -var onebytespace = "\u0020"; - -function is_whitespace(c) { - return whitespaces.indexOf(c.charCodeAt(0)) > -1; -} - -function test_regexp(str) { - var pos_match = str.match(/\s/); - var neg_match = str.match(/\S/); - var test_char = str[0]; - var postfix = str[1]; - if (is_whitespace(test_char)) { - assertEquals(test_char, pos_match[0]); - assertEquals(postfix, neg_match[0]); - } else { - assertEquals(test_char, neg_match[0]); - assertNull(pos_match); - } -} - -function test_trim(c, infix) { - var str = c + c + c + infix + c; - if (is_whitespace(c)) { - assertEquals(infix, str.trim()); - } else { - assertEquals(str, str.trim()); - } -} - -function test_parseInt(c, postfix) { - // Skip if prefix is a digit. - if (c >= "0" && c <= 9) return; - var str = c + c + "123" + postfix; - if (is_whitespace(c)) { - assertEquals(123, parseInt(str)); - } else { - assertEquals(NaN, parseInt(str)); - } -} - -function test_eval(c, content) { - if (!is_whitespace(c)) return; - var str = c + c + "'" + content + "'" + c + c; - assertEquals(content, eval(str)); -} - -function test_stringtonumber(c, postfix) { - // Skip if prefix is a digit. - if (c >= "0" && c <= 9) return; - var result = 1 + Number(c + "123" + c + postfix); - if (is_whitespace(c)) { - assertEquals(124, result); - } else { - assertEquals(NaN, result); - } -} - -for (var i = 0; i < 0x10000; i++) { - c = String.fromCharCode(i); - test_regexp(c + onebyte); - test_regexp(c + twobyte); - test_trim(c, onebyte + "trim"); - test_trim(c, twobyte + "trim"); - test_parseInt(c, onebyte); - test_parseInt(c, twobyte); - test_eval(c, onebyte); - test_eval(c, twobyte); - test_stringtonumber(c, onebytespace); - test_stringtonumber(c, twobytespace); -}