From: yangguo@chromium.org Date: Mon, 11 Mar 2013 11:52:11 +0000 (+0000) Subject: Fix white space matching in latin-1 strings wrt \u00a0. X-Git-Tag: upstream/4.7.83~14886 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b85237a0bc0baa980c8b24c781381321f82b161e;p=platform%2Fupstream%2Fv8.git Fix white space matching in latin-1 strings wrt \u00a0. R=dcarney@chromium.org BUG=181422 Review URL: https://chromiumcodereview.appspot.com/12644008 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13898 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc index acb24ef14..7e6c344c0 100644 --- a/src/arm/regexp-macro-assembler-arm.cc +++ b/src/arm/regexp-macro-assembler-arm.cc @@ -539,29 +539,23 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type, case 's': // Match space-characters if (mode_ == ASCII) { - // ASCII space characters are '\t'..'\r' and ' '. + // One byte space characters are '\t'..'\r', ' ' and \u00a0. Label success; __ cmp(current_character(), Operand(' ')); __ b(eq, &success); // Check range 0x09..0x0d __ sub(r0, current_character(), Operand('\t')); __ cmp(r0, Operand('\r' - '\t')); - BranchOrBacktrack(hi, on_no_match); + __ b(ls, &success); + // \u00a0 (NBSP). + __ cmp(r0, Operand(0x00a0 - '\t')); + BranchOrBacktrack(ne, on_no_match); __ bind(&success); return true; } return false; case 'S': - // Match non-space characters. - if (mode_ == ASCII) { - // ASCII space characters are '\t'..'\r' and ' '. - __ cmp(current_character(), Operand(' ')); - BranchOrBacktrack(eq, on_no_match); - __ sub(r0, current_character(), Operand('\t')); - __ cmp(r0, Operand('\r' - '\t')); - BranchOrBacktrack(ls, on_no_match); - return true; - } + // The emitted code for generic character classes is good enough. return false; case 'd': // Match ASCII digits ('0'..'9') diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc index 49c75e133..bb0e0adb7 100644 --- a/src/ia32/regexp-macro-assembler-ia32.cc +++ b/src/ia32/regexp-macro-assembler-ia32.cc @@ -599,29 +599,23 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, case 's': // Match space-characters if (mode_ == ASCII) { - // ASCII space characters are '\t'..'\r' and ' '. + // One byte space characters are '\t'..'\r', ' ' and \u00a0. Label success; __ cmp(current_character(), ' '); - __ j(equal, &success); + __ j(equal, &success, Label::kNear); // Check range 0x09..0x0d __ lea(eax, Operand(current_character(), -'\t')); __ cmp(eax, '\r' - '\t'); - BranchOrBacktrack(above, on_no_match); + __ j(below_equal, &success, Label::kNear); + // \u00a0 (NBSP). + __ cmp(eax, 0x00a0 - '\t'); + BranchOrBacktrack(not_equal, on_no_match); __ bind(&success); return true; } return false; case 'S': - // Match non-space characters. - if (mode_ == ASCII) { - // ASCII space characters are '\t'..'\r' and ' '. - __ cmp(current_character(), ' '); - BranchOrBacktrack(equal, on_no_match); - __ lea(eax, Operand(current_character(), -'\t')); - __ cmp(eax, '\r' - '\t'); - BranchOrBacktrack(below_equal, on_no_match); - return true; - } + // The emitted code for generic character classes is good enough. return false; case 'd': // Match ASCII digits ('0'..'9') diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc index f5b5e954a..c9871f0e4 100644 --- a/src/x64/regexp-macro-assembler-x64.cc +++ b/src/x64/regexp-macro-assembler-x64.cc @@ -640,29 +640,23 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, case 's': // Match space-characters if (mode_ == ASCII) { - // ASCII space characters are '\t'..'\r' and ' '. + // One byte space characters are '\t'..'\r', ' ' and \u00a0. Label success; __ cmpl(current_character(), Immediate(' ')); - __ j(equal, &success); + __ j(equal, &success, Label::kNear); // Check range 0x09..0x0d __ lea(rax, Operand(current_character(), -'\t')); __ cmpl(rax, Immediate('\r' - '\t')); - BranchOrBacktrack(above, on_no_match); + __ j(below_equal, &success, Label::kNear); + // \u00a0 (NBSP). + __ cmpl(rax, Immediate(0x00a0 - '\t')); + BranchOrBacktrack(not_equal, on_no_match); __ bind(&success); return true; } return false; case 'S': - // Match non-space characters. - if (mode_ == ASCII) { - // ASCII space characters are '\t'..'\r' and ' '. - __ cmpl(current_character(), Immediate(' ')); - BranchOrBacktrack(equal, on_no_match); - __ lea(rax, Operand(current_character(), -'\t')); - __ cmpl(rax, Immediate('\r' - '\t')); - BranchOrBacktrack(below_equal, on_no_match); - return true; - } + // The emitted code for generic character classes is good enough. return false; case 'd': // Match ASCII digits ('0'..'9') diff --git a/test/mjsunit/regress/regress-crbug-181422.js b/test/mjsunit/regress/regress-crbug-181422.js new file mode 100644 index 000000000..52826f311 --- /dev/null +++ b/test/mjsunit/regress/regress-crbug-181422.js @@ -0,0 +1,32 @@ +// Copyright 2013 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +assertArrayEquals(["\u00a0"], "ab\u00a0cd".match(/\s/)); +assertArrayEquals(["a", "b", "c", "d"], "ab\u00a0cd".match(/\S/g)); + +assertArrayEquals(["\u00a0"], "\u2604b\u00a0cd".match(/\s/)); +assertArrayEquals(["\u2604", "b", "c", "d"], "\u2604b\u00a0cd".match(/\S/g));