String#split is buggy
authorbarraclough@apple.com <barraclough@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 20 Sep 2011 01:41:42 +0000 (01:41 +0000)
committerbarraclough@apple.com <barraclough@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 20 Sep 2011 01:41:42 +0000 (01:41 +0000)
https://bugs.webkit.org/show_bug.cgi?id=68348

Reviewed by Sam Weinig.

Source/JavaScriptCore:

* runtime/StringPrototype.cpp:
(JSC::jsStringWithReuse):
    - added helper function to reuse original JSString value.
(JSC::stringProtoFuncSplit):
    - Rewritten from the spec.
* tests/mozilla/ecma/String/15.5.4.8-2.js:
(getTestCases):
    - This test is not ES5 compliant.

LayoutTests:

* fast/js/script-tests/string-split-conformance.js: Added.
* fast/js/string-split-conformance-expected.txt: Added.
* fast/js/string-split-conformance.html: Added.
    - Added new Layout test based on:
        http://stevenlevithan.com/demo/split.cfm
* sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T6-expected.txt:
* sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T7-expected.txt:
* sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T8-expected.txt:
* sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T9-expected.txt:
* sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A2_T7-expected.txt:
    - Check in failing results for these 5 tests; they are all wrong
      (see https://bugs.ecmascript.org/show_bug.cgi?id=61).

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@95504 268f45cc-cd09-0410-ab3c-d52691b4dbfc

12 files changed:
LayoutTests/ChangeLog
LayoutTests/fast/js/script-tests/string-split-conformance.js [new file with mode: 0644]
LayoutTests/fast/js/string-split-conformance-expected.txt [new file with mode: 0644]
LayoutTests/fast/js/string-split-conformance.html [new file with mode: 0644]
LayoutTests/sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T6-expected.txt
LayoutTests/sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T7-expected.txt
LayoutTests/sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T8-expected.txt
LayoutTests/sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T9-expected.txt
LayoutTests/sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A2_T7-expected.txt
Source/JavaScriptCore/ChangeLog
Source/JavaScriptCore/runtime/StringPrototype.cpp
Source/JavaScriptCore/tests/mozilla/ecma/String/15.5.4.8-2.js

index 63df99f..0c5ac9c 100644 (file)
@@ -1,3 +1,23 @@
+2011-09-19  Gavin Barraclough  <barraclough@apple.com>
+
+        String#split is buggy
+        https://bugs.webkit.org/show_bug.cgi?id=68348
+
+        Reviewed by Sam Weinig.
+
+        * fast/js/script-tests/string-split-conformance.js: Added.
+        * fast/js/string-split-conformance-expected.txt: Added.
+        * fast/js/string-split-conformance.html: Added.
+            - Added new Layout test based on:
+                http://stevenlevithan.com/demo/split.cfm
+        * sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T6-expected.txt:
+        * sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T7-expected.txt:
+        * sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T8-expected.txt:
+        * sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A1_T9-expected.txt:
+        * sputnik/Conformance/15_Native_Objects/15.5_String/15.5.4/15.5.4.14_String.prototype.split/S15.5.4.14_A2_T7-expected.txt:
+            - Check in failing results for these 5 tests; they are all wrong
+              (see https://bugs.ecmascript.org/show_bug.cgi?id=61).
+
 2011-09-19  Sheriff Bot  <webkit.review.bot@gmail.com>
 
         Unreviewed, rolling out r95482.
diff --git a/LayoutTests/fast/js/script-tests/string-split-conformance.js b/LayoutTests/fast/js/script-tests/string-split-conformance.js
new file mode 100644 (file)
index 0000000..986ca75
--- /dev/null
@@ -0,0 +1,75 @@
+description(
+'This test checks for a regression against <a href="https://bugs.webkit.org/show_bug.cgi?id=68348">String#split is buggy</a>.'
+);
+
+// The following JavaScript code (including "".split tests) is copyright by Steven Levithan,
+// and released under the MIT License
+var testCode = [
+    ["''.split()",                     [""]],
+    ["''.split(/./)",          [""]],
+    ["''.split(/.?/)",         []],
+    ["''.split(/.??/)",                []],
+    ["'ab'.split(/a*/)",               ["", "b"]],
+    ["'ab'.split(/a*?/)",              ["a", "b"]],
+    ["'ab'.split(/(?:ab)/)",   ["", ""]],
+    ["'ab'.split(/(?:ab)*/)",  ["", ""]],
+    ["'ab'.split(/(?:ab)*?/)", ["a", "b"]],
+    ["'test'.split('')",               ["t", "e", "s", "t"]],
+    ["'test'.split()",         ["test"]],
+    ["'111'.split(1)",         ["", "", "", ""]],
+    ["'test'.split(/(?:)/, 2)",                ["t", "e"]],
+    ["'test'.split(/(?:)/, -1)",       ["t", "e", "s", "t"]],
+    ["'test'.split(/(?:)/, undefined)",        ["t", "e", "s", "t"]],
+    ["'test'.split(/(?:)/, null)",     []],
+    ["'test'.split(/(?:)/, NaN)",      []],
+    ["'test'.split(/(?:)/, true)",     ["t"]],
+    ["'test'.split(/(?:)/, '2')",      ["t", "e"]],
+    ["'test'.split(/(?:)/, 'two')",    []],
+    ["'a'.split(/-/)",         ["a"]],
+    ["'a'.split(/-?/)",                ["a"]],
+    ["'a'.split(/-??/)",               ["a"]],
+    ["'a'.split(/a/)",         ["", ""]],
+    ["'a'.split(/a?/)",                ["", ""]],
+    ["'a'.split(/a??/)",               ["a"]],
+    ["'ab'.split(/-/)",                ["ab"]],
+    ["'ab'.split(/-?/)",               ["a", "b"]],
+    ["'ab'.split(/-??/)",              ["a", "b"]],
+    ["'a-b'.split(/-/)",               ["a", "b"]],
+    ["'a-b'.split(/-?/)",              ["a", "b"]],
+    ["'a-b'.split(/-??/)",             ["a", "-", "b"]],
+    ["'a--b'.split(/-/)",              ["a", "", "b"]],
+    ["'a--b'.split(/-?/)",             ["a", "", "b"]],
+    ["'a--b'.split(/-??/)",            ["a", "-", "-", "b"]],
+    ["''.split(/()()/)",               []],
+    ["'.'.split(/()()/)",              ["."]],
+    ["'.'.split(/(.?)(.?)/)",  ["", ".", "", ""]],
+    ["'.'.split(/(.??)(.??)/)",        ["."]],
+    ["'.'.split(/(.)?(.)?/)",  ["", ".", undefined, ""]],
+    ["'A<B>bold</B>and<CODE>coded</CODE>'.split(ecmaSampleRe)", ["A", undefined, "B", "bold", "/", "B", "and", undefined, "CODE", "coded", "/", "CODE", ""]],
+    ["'tesst'.split(/(s)*/)",  ["t", undefined, "e", "s", "t"]],
+    ["'tesst'.split(/(s)*?/)", ["t", undefined, "e", undefined, "s", undefined, "s", undefined, "t"]],
+    ["'tesst'.split(/(s*)/)",  ["t", "", "e", "ss", "t"]],
+    ["'tesst'.split(/(s*?)/)", ["t", "", "e", "", "s", "", "s", "", "t"]],
+    ["'tesst'.split(/(?:s)*/)",        ["t", "e", "t"]],
+    ["'tesst'.split(/(?=s+)/)",        ["te", "s", "st"]],
+    ["'test'.split('t')",              ["", "es", ""]],
+    ["'test'.split('es')",             ["t", "t"]],
+    ["'test'.split(/t/)",              ["", "es", ""]],
+    ["'test'.split(/es/)",             ["t", "t"]],
+    ["'test'.split(/(t)/)",            ["", "t", "es", "t", ""]],
+    ["'test'.split(/(es)/)",   ["t", "es", "t"]],
+    ["'test'.split(/(t)(e)(s)(t)/)",["", "t", "e", "s", "t", ""]],
+    ["'.'.split(/(((.((.??)))))/)",    ["", ".", ".", ".", "", "", ""]],
+    ["'.'.split(/(((((.??)))))/)",     ["."]]
+];
+var ecmaSampleRe = /<(\/)?([^<>]+)>/;
+
+for (var i in testCode)
+    shouldBe(testCode[i][0], 'testCode[i][1]');
+
+// Check that split works with object separators, and that steps 8 & 9 are performed in the correct order.
+var separatorToStringCalled = "ToString not called on the separator";
+shouldBe("'hello'.split({toString:function(){return 'e';}})", '["h", "llo"]');
+shouldBe("var a = 'hello'.split({toString:function(){separatorToStringCalled='OKAY';return 'e';}},0); a.push(separatorToStringCalled); a", '["OKAY"]');
+
+var successfullyParsed = true;
diff --git a/LayoutTests/fast/js/string-split-conformance-expected.txt b/LayoutTests/fast/js/string-split-conformance-expected.txt
new file mode 100644 (file)
index 0000000..cb0ba26
--- /dev/null
@@ -0,0 +1,67 @@
+This test checks for a regression against String#split is buggy.
+
+On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
+
+
+PASS ''.split() is testCode[i][1]
+PASS ''.split(/./) is testCode[i][1]
+PASS ''.split(/.?/) is testCode[i][1]
+PASS ''.split(/.??/) is testCode[i][1]
+PASS 'ab'.split(/a*/) is testCode[i][1]
+PASS 'ab'.split(/a*?/) is testCode[i][1]
+PASS 'ab'.split(/(?:ab)/) is testCode[i][1]
+PASS 'ab'.split(/(?:ab)*/) is testCode[i][1]
+PASS 'ab'.split(/(?:ab)*?/) is testCode[i][1]
+PASS 'test'.split('') is testCode[i][1]
+PASS 'test'.split() is testCode[i][1]
+PASS '111'.split(1) is testCode[i][1]
+PASS 'test'.split(/(?:)/, 2) is testCode[i][1]
+PASS 'test'.split(/(?:)/, -1) is testCode[i][1]
+PASS 'test'.split(/(?:)/, undefined) is testCode[i][1]
+PASS 'test'.split(/(?:)/, null) is testCode[i][1]
+PASS 'test'.split(/(?:)/, NaN) is testCode[i][1]
+PASS 'test'.split(/(?:)/, true) is testCode[i][1]
+PASS 'test'.split(/(?:)/, '2') is testCode[i][1]
+PASS 'test'.split(/(?:)/, 'two') is testCode[i][1]
+PASS 'a'.split(/-/) is testCode[i][1]
+PASS 'a'.split(/-?/) is testCode[i][1]
+PASS 'a'.split(/-??/) is testCode[i][1]
+PASS 'a'.split(/a/) is testCode[i][1]
+PASS 'a'.split(/a?/) is testCode[i][1]
+PASS 'a'.split(/a??/) is testCode[i][1]
+PASS 'ab'.split(/-/) is testCode[i][1]
+PASS 'ab'.split(/-?/) is testCode[i][1]
+PASS 'ab'.split(/-??/) is testCode[i][1]
+PASS 'a-b'.split(/-/) is testCode[i][1]
+PASS 'a-b'.split(/-?/) is testCode[i][1]
+PASS 'a-b'.split(/-??/) is testCode[i][1]
+PASS 'a--b'.split(/-/) is testCode[i][1]
+PASS 'a--b'.split(/-?/) is testCode[i][1]
+PASS 'a--b'.split(/-??/) is testCode[i][1]
+PASS ''.split(/()()/) is testCode[i][1]
+PASS '.'.split(/()()/) is testCode[i][1]
+PASS '.'.split(/(.?)(.?)/) is testCode[i][1]
+PASS '.'.split(/(.??)(.??)/) is testCode[i][1]
+PASS '.'.split(/(.)?(.)?/) is testCode[i][1]
+PASS 'A<B>bold</B>and<CODE>coded</CODE>'.split(ecmaSampleRe) is testCode[i][1]
+PASS 'tesst'.split(/(s)*/) is testCode[i][1]
+PASS 'tesst'.split(/(s)*?/) is testCode[i][1]
+PASS 'tesst'.split(/(s*)/) is testCode[i][1]
+PASS 'tesst'.split(/(s*?)/) is testCode[i][1]
+PASS 'tesst'.split(/(?:s)*/) is testCode[i][1]
+PASS 'tesst'.split(/(?=s+)/) is testCode[i][1]
+PASS 'test'.split('t') is testCode[i][1]
+PASS 'test'.split('es') is testCode[i][1]
+PASS 'test'.split(/t/) is testCode[i][1]
+PASS 'test'.split(/es/) is testCode[i][1]
+PASS 'test'.split(/(t)/) is testCode[i][1]
+PASS 'test'.split(/(es)/) is testCode[i][1]
+PASS 'test'.split(/(t)(e)(s)(t)/) is testCode[i][1]
+PASS '.'.split(/(((.((.??)))))/) is testCode[i][1]
+PASS '.'.split(/(((((.??)))))/) is testCode[i][1]
+PASS 'hello'.split({toString:function(){return 'e';}}) is ["h", "llo"]
+PASS var a = 'hello'.split({toString:function(){separatorToStringCalled='OKAY';return 'e';}},0); a.push(separatorToStringCalled); a is ["OKAY"]
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
diff --git a/LayoutTests/fast/js/string-split-conformance.html b/LayoutTests/fast/js/string-split-conformance.html
new file mode 100644 (file)
index 0000000..65eb89e
--- /dev/null
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<link rel="stylesheet" href="resources/js-test-style.css">
+<script src="resources/js-test-pre.js"></script>
+</head>
+<body>
+<p id="description"></p>
+<div id="console"></div>
+<script src="script-tests/string-split-conformance.js"></script>
+<script src="resources/js-test-post.js"></script>
+</body>
+</html>
index acf2e0a..f17e01f 100644 (file)
@@ -1,6 +1,6 @@
 S15.5.4.14_A2_T7
 
-PASS 
+FAIL SputnikError: #2: var __string = "thisundefinedisundefinedaundefinedstringundefinedobject"; var __expected = ["this", "is", "a", "string", "object"]; __split = __string.split(void 0); __split.length === __expected.length. Actual: 1
 
 TEST COMPLETE
 
index 81a493e..35c9570 100644 (file)
@@ -1,3 +1,19 @@
+2011-09-19  Gavin Barraclough  <barraclough@apple.com>
+
+        String#split is buggy
+        https://bugs.webkit.org/show_bug.cgi?id=68348
+
+        Reviewed by Sam Weinig.
+
+        * runtime/StringPrototype.cpp:
+        (JSC::jsStringWithReuse):
+            - added helper function to reuse original JSString value.
+        (JSC::stringProtoFuncSplit):
+            - Rewritten from the spec.
+        * tests/mozilla/ecma/String/15.5.4.8-2.js:
+        (getTestCases):
+            - This test is not ES5 compliant.
+
 2011-09-19  Geoffrey Garen  <ggaren@apple.com>
 
         Removed lots of friend declarations from JSCell, so we can more
index 8e19f01..ab90265 100644 (file)
@@ -157,6 +157,18 @@ bool StringPrototype::getOwnPropertyDescriptor(ExecState* exec, const Identifier
 
 // ------------------------------ Functions --------------------------
 
+// Helper for producing a JSString for 'string', where 'string' was been produced by
+// calling ToString on 'originalValue'. In cases where 'originalValue' already was a
+// string primitive we can just use this, otherwise we need to allocate a new JSString.
+static inline JSString* jsStringWithReuse(ExecState* exec, JSValue originalValue, const UString& string)
+{
+    if (originalValue.isString()) {
+        ASSERT(asString(originalValue)->value(exec) == string);
+        return asString(originalValue);
+    }
+    return jsString(exec, string);
+}
+
 static NEVER_INLINE UString substituteBackreferencesSlow(const UString& replacement, const UString& source, const int* ovector, RegExp* reg, size_t i)
 {
     Vector<UChar> substitutedReplacement;
@@ -779,69 +791,189 @@ EncodedJSValue JSC_HOST_CALL stringProtoFuncSlice(ExecState* exec)
     return JSValue::encode(jsEmptyString(exec));
 }
 
+// ES 5.1 - 15.5.4.14 String.prototype.split (separator, limit)
 EncodedJSValue JSC_HOST_CALL stringProtoFuncSplit(ExecState* exec)
 {
+    // 1. Call CheckObjectCoercible passing the this value as its argument.
     JSValue thisValue = exec->hostThisValue();
-    if (thisValue.isUndefinedOrNull()) // CheckObjectCoercible
+    if (thisValue.isUndefinedOrNull())
         return throwVMTypeError(exec);
-    UString s = thisValue.toString(exec);
-    JSGlobalData* globalData = &exec->globalData();
 
-    JSValue a0 = exec->argument(0);
-    JSValue a1 = exec->argument(1);
+    // 2. Let S be the result of calling ToString, giving it the this value as its argument.
+    // 6. Let s be the number of characters in S.
+    UString input = thisValue.toString(exec);
 
+    // 3. Let A be a new array created as if by the expression new Array()
+    //    where Array is the standard built-in constructor with that name.
     JSArray* result = constructEmptyArray(exec);
-    unsigned i = 0;
-    unsigned p0 = 0;
-    unsigned limit = a1.isUndefined() ? 0xFFFFFFFFU : a1.toUInt32(exec);
-    if (a0.inherits(&RegExpObject::s_info)) {
-        RegExp* reg = asRegExpObject(a0)->regExp();
-        if (s.isEmpty() && reg->match(*globalData, s, 0) >= 0) {
-            // empty string matched by regexp -> empty array
+
+    // 4. Let lengthA be 0.
+    unsigned resultLength = 0;
+
+    // 5. If limit is undefined, let lim = 2^32-1; else let lim = ToUint32(limit).
+    JSValue limitValue = exec->argument(1);
+    unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(exec);
+
+    // 7. Let p = 0.
+    size_t position = 0;
+
+    // 8. If separator is a RegExp object (its [[Class]] is "RegExp"), let R = separator;
+    //    otherwise let R = ToString(separator).
+    JSValue separatorValue = exec->argument(0);
+    if (separatorValue.inherits(&RegExpObject::s_info)) {
+        JSGlobalData* globalData = &exec->globalData();
+        RegExp* reg = asRegExpObject(separatorValue)->regExp();
+
+        // 9. If lim == 0, return A.
+        if (!limit)
+            return JSValue::encode(result);
+
+        // 10. If separator is undefined, then
+        if (separatorValue.isUndefined()) {
+            // a.  Call the [[DefineOwnProperty]] internal method of A with arguments "0",
+            //     Property Descriptor {[[Value]]: S, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+            result->put(exec, 0, jsStringWithReuse(exec, thisValue, input));
+            // b.  Return A.
             return JSValue::encode(result);
         }
-        unsigned pos = 0;
-        while (i != limit && pos < s.length()) {
+
+        // 11. If s == 0, then
+        if (input.isEmpty()) {
+            // a. Call SplitMatch(S, 0, R) and let z be its MatchResult result.
+            // b. If z is not failure, return A.
+            // c. Call the [[DefineOwnProperty]] internal method of A with arguments "0",
+            //    Property Descriptor {[[Value]]: S, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+            // d. Return A.
+            if (reg->match(*globalData, input, 0) < 0)
+                result->put(exec, 0, jsStringWithReuse(exec, thisValue, input));
+            return JSValue::encode(result);
+        }
+
+        // 12. Let q = p.
+        size_t matchPosition = 0;
+        // 13. Repeat, while q != s
+        while (matchPosition < input.length()) {
+            // a. Call SplitMatch(S, q, R) and let z be its MatchResult result.
             Vector<int, 32> ovector;
-            int mpos = reg->match(*globalData, s, pos, &ovector);
+            int mpos = reg->match(*globalData, input, matchPosition, &ovector);
+            // b. If z is failure, then let q = q + 1.
             if (mpos < 0)
                 break;
-            int mlen = ovector[1] - ovector[0];
-            pos = mpos + (mlen == 0 ? 1 : mlen);
-            if (static_cast<unsigned>(mpos) != p0 || mlen) {
-                result->put(exec, i++, jsSubstring(exec, s, p0, mpos - p0));
-                p0 = mpos + mlen;
+            matchPosition = mpos;
+
+            // c. Else, z is not failure
+            // i. z must be a State. Let e be z's endIndex and let cap be z's captures array.
+            size_t matchEnd = ovector[1];
+
+            // ii. If e == p, then let q = q + 1.
+            if (matchEnd == position) {
+                ++matchPosition;
+                continue;
             }
-            for (unsigned si = 1; si <= reg->numSubpatterns(); ++si) {
-                int spos = ovector[si * 2];
-                if (spos < 0)
-                    result->put(exec, i++, jsUndefined());
-                else
-                    result->put(exec, i++, jsSubstring(exec, s, spos, ovector[si * 2 + 1] - spos));
+            // iii. Else, e != p
+
+            // 1. Let T be a String value equal to the substring of S consisting of the characters at positions p (inclusive)
+            //    through q (exclusive).
+            // 2. Call the [[DefineOwnProperty]] internal method of A with arguments ToString(lengthA),
+            //    Property Descriptor {[[Value]]: T, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+            result->put(exec, resultLength, jsSubstring(exec, input, position, matchPosition - position));
+            // 3. Increment lengthA by 1.
+            // 4. If lengthA == lim, return A.
+            if (++resultLength == limit)
+                return JSValue::encode(result);
+
+            // 5. Let p = e.
+            // 8. Let q = p.
+            position = matchEnd;
+            matchPosition = matchEnd;
+
+            // 6. Let i = 0.
+            // 7. Repeat, while i is not equal to the number of elements in cap.
+            //  a Let i = i + 1.
+            for (unsigned i = 1; i <= reg->numSubpatterns(); ++i) {
+                // b Call the [[DefineOwnProperty]] internal method of A with arguments
+                //   ToString(lengthA), Property Descriptor {[[Value]]: cap[i], [[Writable]]:
+                //   true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+                int sub = ovector[i * 2];
+                result->put(exec, resultLength, sub < 0 ? jsUndefined() : jsSubstring(exec, input, sub, ovector[i * 2 + 1] - sub));
+                // c Increment lengthA by 1.
+                // d If lengthA == lim, return A.
+                if (++resultLength == limit)
+                    return JSValue::encode(result);
             }
         }
     } else {
-        UString u2 = a0.toString(exec);
-        if (u2.isEmpty()) {
-            if (s.isEmpty()) {
-                // empty separator matches empty string -> empty array
+        UString separator = separatorValue.toString(exec);
+
+        // 9. If lim == 0, return A.
+        if (!limit)
+            return JSValue::encode(result);
+
+        // 10. If separator is undefined, then
+        JSValue separatorValue = exec->argument(0);
+        if (separatorValue.isUndefined()) {
+            // a.  Call the [[DefineOwnProperty]] internal method of A with arguments "0",
+            //     Property Descriptor {[[Value]]: S, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+            result->put(exec, 0, jsStringWithReuse(exec, thisValue, input));
+            // b.  Return A.
+            return JSValue::encode(result);
+        }
+
+        // 11. If s == 0, then
+        if (input.isEmpty()) {
+            // a. Call SplitMatch(S, 0, R) and let z be its MatchResult result.
+            // b. If z is not failure, return A.
+            // c. Call the [[DefineOwnProperty]] internal method of A with arguments "0",
+            //    Property Descriptor {[[Value]]: S, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+            // d. Return A.
+            if (!separator.isEmpty())
+                result->put(exec, 0, jsStringWithReuse(exec, thisValue, input));
+            return JSValue::encode(result);
+        }
+
+        // Optimized case for splitting on the empty string.
+        if (separator.isEmpty()) {
+            limit = std::min(limit, input.length());
+            // Zero limt/input length handled in steps 9/11 respectively, above.
+            ASSERT(limit);
+
+            do
+                result->put(exec, position, jsSingleCharacterSubstring(exec, input, position));
+            while (++position < limit);
+
+            return JSValue::encode(result);
+        }
+
+        // 12. Let q = p.
+        size_t matchPosition;
+        // 13. Repeat, while q != s
+        //   a. Call SplitMatch(S, q, R) and let z be its MatchResult result.
+        //   b. If z is failure, then let q = q+1.
+        //   c. Else, z is not failure
+        while ((matchPosition = input.find(separator, position)) != notFound) {
+            // 1. Let T be a String value equal to the substring of S consisting of the characters at positions p (inclusive)
+            //    through q (exclusive).
+            // 2. Call the [[DefineOwnProperty]] internal method of A with arguments ToString(lengthA),
+            //    Property Descriptor {[[Value]]: T, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+            result->put(exec, resultLength, jsSubstring(exec, input, position, matchPosition - position));
+            // 3. Increment lengthA by 1.
+            // 4. If lengthA == lim, return A.
+            if (++resultLength == limit)
                 return JSValue::encode(result);
-            }
-            while (i != limit && p0 < s.length() - 1)
-                result->put(exec, i++, jsSingleCharacterSubstring(exec, s, p0++));
-        } else {
-            size_t pos;
-            while (i != limit && (pos = s.find(u2, p0)) != notFound) {
-                result->put(exec, i++, jsSubstring(exec, s, p0, pos - p0));
-                p0 = pos + u2.length();
-            }
+
+            // 5. Let p = e.
+            // 8. Let q = p.
+            position = matchPosition + separator.length();
         }
     }
 
-    // add remaining string
-    if (i != limit)
-        result->put(exec, i++, jsSubstring(exec, s, p0, s.length() - p0));
+    // 14. Let T be a String value equal to the substring of S consisting of the characters at positions p (inclusive)
+    //     through s (exclusive).
+    // 15. Call the [[DefineOwnProperty]] internal method of A with arguments ToString(lengthA), Property Descriptor
+    //     {[[Value]]: T, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
+    result->put(exec, resultLength++, jsSubstring(exec, input, position, input.length() - position));
 
+    // 16. Return A.
     return JSValue::encode(result);
 }
 
index 18e41cd..59a730c 100644 (file)
@@ -107,11 +107,11 @@ function getTestCases() {
                                     eval("var s = new String( TEST_STRING ); s.split('')["+i+"]") );
     }
 
-    // case where the value of the separator is undefined.  in this case. the value of the separator
-    // should be ToString( separator ), or "undefined".
+    // Case where the value of the separator is undefined.
+    // Per ES5 15.5.4.14 step 10 this returns the input (unless limit is non-zero).
 
     var TEST_STRING = "thisundefinedisundefinedaundefinedstringundefinedobject";
-    var EXPECT_STRING = new Array( "this", "is", "a", "string", "object" );
+    var EXPECT_STRING = new Array( "thisundefinedisundefinedaundefinedstringundefinedobject" );
 
     array[item++] = new TestCase(   SECTION,
                                     "var s = new String( "+ TEST_STRING +" ); s.split(void 0).length",