tizen beta release
[framework/web/webkit-efl.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "UString.h"
31 #include "Yarr.h"
32 #include <wtf/BumpPointerAllocator.h>
33 #include <wtf/text/CString.h>
34
35 #ifndef NDEBUG
36 #include <stdio.h>
37 #endif
38
39 using namespace WTF;
40
41 namespace JSC { namespace Yarr {
42
43 class Interpreter {
44 public:
45     struct ParenthesesDisjunctionContext;
46
47     struct BackTrackInfoPatternCharacter {
48         uintptr_t matchAmount;
49     };
50     struct BackTrackInfoCharacterClass {
51         uintptr_t matchAmount;
52     };
53     struct BackTrackInfoBackReference {
54         uintptr_t begin; // Not really needed for greedy quantifiers.
55         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
56     };
57     struct BackTrackInfoAlternative {
58         uintptr_t offset;
59     };
60     struct BackTrackInfoParentheticalAssertion {
61         uintptr_t begin;
62     };
63     struct BackTrackInfoParenthesesOnce {
64         uintptr_t begin;
65     };
66     struct BackTrackInfoParenthesesTerminal {
67         uintptr_t begin;
68     };
69     struct BackTrackInfoParentheses {
70         uintptr_t matchAmount;
71         ParenthesesDisjunctionContext* lastContext;
72     };
73
74     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
75     {
76         context->next = backTrack->lastContext;
77         backTrack->lastContext = context;
78         ++backTrack->matchAmount;
79     }
80
81     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
82     {
83         ASSERT(backTrack->matchAmount);
84         ASSERT(backTrack->lastContext);
85         backTrack->lastContext = backTrack->lastContext->next;
86         --backTrack->matchAmount;
87     }
88
89     struct DisjunctionContext
90     {
91         DisjunctionContext()
92             : term(0)
93         {
94         }
95
96         void* operator new(size_t, void* where)
97         {
98             return where;
99         }
100
101         int term;
102         unsigned matchBegin;
103         unsigned matchEnd;
104         uintptr_t frame[1];
105     };
106
107     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
108     {
109         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
110         allocatorPool = allocatorPool->ensureCapacity(size);
111         if (!allocatorPool)
112             CRASH();
113         return new(allocatorPool->alloc(size)) DisjunctionContext();
114     }
115
116     void freeDisjunctionContext(DisjunctionContext* context)
117     {
118         allocatorPool = allocatorPool->dealloc(context);
119     }
120
121     struct ParenthesesDisjunctionContext
122     {
123         ParenthesesDisjunctionContext(int* output, ByteTerm& term)
124             : next(0)
125         {
126             unsigned firstSubpatternId = term.atom.subpatternId;
127             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
128
129             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
130                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
131                 output[(firstSubpatternId << 1) + i] = -1;
132             }
133
134             new(getDisjunctionContext(term)) DisjunctionContext();
135         }
136
137         void* operator new(size_t, void* where)
138         {
139             return where;
140         }
141
142         void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
143         {
144             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
145                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
146         }
147
148         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
149         {
150             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
151         }
152
153         ParenthesesDisjunctionContext* next;
154         int subpatternBackup[1];
155     };
156
157     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
158     {
159         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
160         allocatorPool = allocatorPool->ensureCapacity(size);
161         if (!allocatorPool)
162             CRASH();
163         return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
164     }
165
166     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
167     {
168         allocatorPool = allocatorPool->dealloc(context);
169     }
170
171     // This class is a placeholder for future character iterator, current 
172     // proposed name StringConstCharacterIterator.
173     class CharAccess {
174     public:
175         CharAccess(const UString& s)
176         {
177             if (s.is8Bit()) {
178                 m_charSize = Char8;
179                 m_ptr.ptr8 = s.characters8();
180             } else {
181                 m_charSize = Char16;
182                 m_ptr.ptr16 = s.characters16();
183             }
184         }
185
186         CharAccess(const LChar* ptr)
187             : m_charSize(Char8)
188         {
189             m_ptr.ptr8 = ptr;
190         }
191
192         CharAccess(const UChar* ptr)
193             : m_charSize(Char16)
194         {
195             m_ptr.ptr16 = ptr;
196         }
197
198         ~CharAccess()
199         {
200         }
201
202         inline UChar operator[](unsigned index)
203         {
204             if (m_charSize == Char8)
205                 return m_ptr.ptr8[index];
206             return m_ptr.ptr16[index];
207         }
208
209     private:
210         union {
211             const LChar* ptr8;
212             const UChar* ptr16;
213         } m_ptr;
214         YarrCharSize m_charSize;
215     };
216
217     class InputStream {
218     public:
219         InputStream(const UString& input, unsigned start, unsigned length)
220             : input(input)
221             , pos(start)
222             , length(length)
223         {
224         }
225
226         void next()
227         {
228             ++pos;
229         }
230
231         void rewind(unsigned amount)
232         {
233             ASSERT(pos >= amount);
234             pos -= amount;
235         }
236
237         int read()
238         {
239             ASSERT(pos < length);
240             if (pos < length)
241                 return input[pos];
242             return -1;
243         }
244
245         int readPair()
246         {
247             ASSERT(pos + 1 < length);
248             return input[pos] | input[pos + 1] << 16;
249         }
250
251         int readChecked(int position)
252         {
253             ASSERT(position < 0);
254             ASSERT(static_cast<unsigned>(-position) <= pos);
255             unsigned p = pos + position;
256             ASSERT(p < length);
257             return input[p];
258         }
259
260         int reread(unsigned from)
261         {
262             ASSERT(from < length);
263             return input[from];
264         }
265
266         int prev()
267         {
268             ASSERT(!(pos > length));
269             if (pos && length)
270                 return input[pos - 1];
271             return -1;
272         }
273
274         unsigned getPos()
275         {
276             return pos;
277         }
278
279         void setPos(unsigned p)
280         {
281             pos = p;
282         }
283
284         bool atStart()
285         {
286             return pos == 0;
287         }
288
289         bool atEnd()
290         {
291             return pos == length;
292         }
293
294         unsigned end()
295         {
296             return length;
297         }
298
299         bool checkInput(int count)
300         {
301             if ((pos + count) <= length) {
302                 pos += count;
303                 return true;
304             }
305             return false;
306         }
307
308         void uncheckInput(int count)
309         {
310             pos -= count;
311         }
312
313         bool atStart(int position)
314         {
315             return (pos + position) == 0;
316         }
317
318         bool atEnd(int position)
319         {
320             return (pos + position) == length;
321         }
322
323         bool isNotAvailableInput(int position)
324         {
325             return (pos + position) > length;
326         }
327
328     private:
329         CharAccess input;
330         unsigned pos;
331         unsigned length;
332     };
333
334     bool testCharacterClass(CharacterClass* characterClass, int ch)
335     {
336         if (ch & 0xFF80) {
337             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
338                 if (ch == characterClass->m_matchesUnicode[i])
339                     return true;
340             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
341                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
342                     return true;
343         } else {
344             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
345                 if (ch == characterClass->m_matches[i])
346                     return true;
347             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
348                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
349                     return true;
350         }
351
352         return false;
353     }
354
355     bool checkCharacter(int testChar, int inputPosition)
356     {
357         return testChar == input.readChecked(inputPosition);
358     }
359
360     bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
361     {
362         int ch = input.readChecked(inputPosition);
363         return (loChar == ch) || (hiChar == ch);
364     }
365
366     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
367     {
368         bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
369         return invert ? !match : match;
370     }
371
372     bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
373     {
374         int matchSize = matchEnd - matchBegin;
375
376         if (!input.checkInput(matchSize))
377             return false;
378
379         if (pattern->m_ignoreCase) {
380             for (int i = 0; i < matchSize; ++i) {
381                 int ch = input.reread(matchBegin + i);
382
383                 int lo = Unicode::toLower(ch);
384                 int hi = Unicode::toUpper(ch);
385
386                 if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
387                     input.uncheckInput(matchSize);
388                     return false;
389                 }
390             }
391         } else {
392             for (int i = 0; i < matchSize; ++i) {
393                 if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
394                     input.uncheckInput(matchSize);
395                     return false;
396                 }
397             }
398         }
399
400         return true;
401     }
402
403     bool matchAssertionBOL(ByteTerm& term)
404     {
405         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
406     }
407
408     bool matchAssertionEOL(ByteTerm& term)
409     {
410         if (term.inputPosition)
411             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
412
413         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
414     }
415
416     bool matchAssertionWordBoundary(ByteTerm& term)
417     {
418         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
419         bool readIsWordchar;
420         if (term.inputPosition)
421             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
422         else
423             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
424
425         bool wordBoundary = prevIsWordchar != readIsWordchar;
426         return term.invert() ? !wordBoundary : wordBoundary;
427     }
428
429     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
430     {
431         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
432
433         switch (term.atom.quantityType) {
434         case QuantifierFixedCount:
435             break;
436
437         case QuantifierGreedy:
438             if (backTrack->matchAmount) {
439                 --backTrack->matchAmount;
440                 input.uncheckInput(1);
441                 return true;
442             }
443             break;
444
445         case QuantifierNonGreedy:
446             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
447                 ++backTrack->matchAmount;
448                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
449                     return true;
450             }
451             input.uncheckInput(backTrack->matchAmount);
452             break;
453         }
454
455         return false;
456     }
457
458     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
459     {
460         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
461
462         switch (term.atom.quantityType) {
463         case QuantifierFixedCount:
464             break;
465
466         case QuantifierGreedy:
467             if (backTrack->matchAmount) {
468                 --backTrack->matchAmount;
469                 input.uncheckInput(1);
470                 return true;
471             }
472             break;
473
474         case QuantifierNonGreedy:
475             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
476                 ++backTrack->matchAmount;
477                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
478                     return true;
479             }
480             input.uncheckInput(backTrack->matchAmount);
481             break;
482         }
483
484         return false;
485     }
486
487     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
488     {
489         ASSERT(term.type == ByteTerm::TypeCharacterClass);
490         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
491
492         switch (term.atom.quantityType) {
493         case QuantifierFixedCount: {
494             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
495                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
496                     return false;
497             }
498             return true;
499         }
500
501         case QuantifierGreedy: {
502             unsigned matchAmount = 0;
503             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
504                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
505                     input.uncheckInput(1);
506                     break;
507                 }
508                 ++matchAmount;
509             }
510             backTrack->matchAmount = matchAmount;
511
512             return true;
513         }
514
515         case QuantifierNonGreedy:
516             backTrack->matchAmount = 0;
517             return true;
518         }
519
520         ASSERT_NOT_REACHED();
521         return false;
522     }
523
524     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
525     {
526         ASSERT(term.type == ByteTerm::TypeCharacterClass);
527         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
528
529         switch (term.atom.quantityType) {
530         case QuantifierFixedCount:
531             break;
532
533         case QuantifierGreedy:
534             if (backTrack->matchAmount) {
535                 --backTrack->matchAmount;
536                 input.uncheckInput(1);
537                 return true;
538             }
539             break;
540
541         case QuantifierNonGreedy:
542             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
543                 ++backTrack->matchAmount;
544                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
545                     return true;
546             }
547             input.uncheckInput(backTrack->matchAmount);
548             break;
549         }
550
551         return false;
552     }
553
554     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
555     {
556         ASSERT(term.type == ByteTerm::TypeBackReference);
557         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
558
559         int matchBegin = output[(term.atom.subpatternId << 1)];
560         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
561
562         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
563         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
564         // Eg.: /(a\1)/
565         if (matchEnd == -1)
566             return true;
567
568         ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
569
570         if (matchBegin == matchEnd)
571             return true;
572
573         switch (term.atom.quantityType) {
574         case QuantifierFixedCount: {
575             backTrack->begin = input.getPos();
576             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
577                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
578                     input.setPos(backTrack->begin);
579                     return false;
580                 }
581             }
582             return true;
583         }
584
585         case QuantifierGreedy: {
586             unsigned matchAmount = 0;
587             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
588                 ++matchAmount;
589             backTrack->matchAmount = matchAmount;
590             return true;
591         }
592
593         case QuantifierNonGreedy:
594             backTrack->begin = input.getPos();
595             backTrack->matchAmount = 0;
596             return true;
597         }
598
599         ASSERT_NOT_REACHED();
600         return false;
601     }
602
603     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
604     {
605         ASSERT(term.type == ByteTerm::TypeBackReference);
606         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
607
608         int matchBegin = output[(term.atom.subpatternId << 1)];
609         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
610         ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
611
612         if (matchBegin == matchEnd)
613             return false;
614
615         switch (term.atom.quantityType) {
616         case QuantifierFixedCount:
617             // for quantityCount == 1, could rewind.
618             input.setPos(backTrack->begin);
619             break;
620
621         case QuantifierGreedy:
622             if (backTrack->matchAmount) {
623                 --backTrack->matchAmount;
624                 input.rewind(matchEnd - matchBegin);
625                 return true;
626             }
627             break;
628
629         case QuantifierNonGreedy:
630             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
631                 ++backTrack->matchAmount;
632                 return true;
633             }
634             input.setPos(backTrack->begin);
635             break;
636         }
637
638         return false;
639     }
640
641     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
642     {
643         if (term.capture()) {
644             unsigned subpatternId = term.atom.subpatternId;
645             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
646             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
647         }
648     }
649     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
650     {
651         unsigned firstSubpatternId = term.atom.subpatternId;
652         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
653         context->restoreOutput(output, firstSubpatternId, count);
654     }
655     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
656     {
657         while (backTrack->matchAmount) {
658             ParenthesesDisjunctionContext* context = backTrack->lastContext;
659
660             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
661             if (result == JSRegExpMatch)
662                 return JSRegExpMatch;
663
664             resetMatches(term, context);
665             popParenthesesDisjunctionContext(backTrack);
666             freeParenthesesDisjunctionContext(context);
667
668             if (result != JSRegExpNoMatch)
669                 return result;
670         }
671
672         return JSRegExpNoMatch;
673     }
674
675     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
676     {
677         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
678         ASSERT(term.atom.quantityCount == 1);
679
680         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
681
682         switch (term.atom.quantityType) {
683         case QuantifierGreedy: {
684             // set this speculatively; if we get to the parens end this will be true.
685             backTrack->begin = input.getPos();
686             break;
687         }
688         case QuantifierNonGreedy: {
689             backTrack->begin = notFound;
690             context->term += term.atom.parenthesesWidth;
691             return true;
692         }
693         case QuantifierFixedCount:
694             break;
695         }
696
697         if (term.capture()) {
698             unsigned subpatternId = term.atom.subpatternId;
699             output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
700         }
701
702         return true;
703     }
704
705     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
706     {
707         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
708         ASSERT(term.atom.quantityCount == 1);
709
710         if (term.capture()) {
711             unsigned subpatternId = term.atom.subpatternId;
712             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
713         }
714
715         if (term.atom.quantityType == QuantifierFixedCount)
716             return true;
717
718         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
719         return backTrack->begin != input.getPos();
720     }
721
722     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
723     {
724         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
725         ASSERT(term.atom.quantityCount == 1);
726
727         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
728
729         if (term.capture()) {
730             unsigned subpatternId = term.atom.subpatternId;
731             output[(subpatternId << 1)] = -1;
732             output[(subpatternId << 1) + 1] = -1;
733         }
734
735         switch (term.atom.quantityType) {
736         case QuantifierGreedy:
737             // if we backtrack to this point, there is another chance - try matching nothing.
738             ASSERT(backTrack->begin != notFound);
739             backTrack->begin = notFound;
740             context->term += term.atom.parenthesesWidth;
741             return true;
742         case QuantifierNonGreedy:
743             ASSERT(backTrack->begin != notFound);
744         case QuantifierFixedCount:
745             break;
746         }
747
748         return false;
749     }
750
751     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
752     {
753         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
754         ASSERT(term.atom.quantityCount == 1);
755
756         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
757
758         switch (term.atom.quantityType) {
759         case QuantifierGreedy:
760             if (backTrack->begin == notFound) {
761                 context->term -= term.atom.parenthesesWidth;
762                 return false;
763             }
764         case QuantifierNonGreedy:
765             if (backTrack->begin == notFound) {
766                 backTrack->begin = input.getPos();
767                 if (term.capture()) {
768                     // Technically this access to inputPosition should be accessing the begin term's
769                     // inputPosition, but for repeats other than fixed these values should be
770                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
771                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
772                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
773                     unsigned subpatternId = term.atom.subpatternId;
774                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
775                 }
776                 context->term -= term.atom.parenthesesWidth;
777                 return true;
778             }
779         case QuantifierFixedCount:
780             break;
781         }
782
783         return false;
784     }
785
786     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
787     {
788         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
789         ASSERT(term.atom.quantityType == QuantifierGreedy);
790         ASSERT(term.atom.quantityCount == quantifyInfinite);
791         ASSERT(!term.capture());
792
793         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
794         backTrack->begin = input.getPos();
795         return true;
796     }
797
798     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
799     {
800         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
801
802         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
803         // Empty match is a failed match.
804         if (backTrack->begin == input.getPos())
805             return false;
806
807         // Successful match! Okay, what's next? - loop around and try to match moar!
808         context->term -= (term.atom.parenthesesWidth + 1);
809         return true;
810     }
811
812     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
813     {
814         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
815         ASSERT(term.atom.quantityType == QuantifierGreedy);
816         ASSERT(term.atom.quantityCount == quantifyInfinite);
817         ASSERT(!term.capture());
818
819         // If we backtrack to this point, we have failed to match this iteration of the parens.
820         // Since this is greedy / zero minimum a failed is also accepted as a match!
821         context->term += term.atom.parenthesesWidth;
822         return true;
823     }
824
825     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
826     {
827         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
828         // should always be returned as a successful match - we should never backtrack to here.
829         ASSERT_NOT_REACHED();
830         return false;
831     }
832
833     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
834     {
835         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
836         ASSERT(term.atom.quantityCount == 1);
837
838         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
839
840         backTrack->begin = input.getPos();
841         return true;
842     }
843
844     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
845     {
846         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
847         ASSERT(term.atom.quantityCount == 1);
848
849         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
850
851         input.setPos(backTrack->begin);
852
853         // We've reached the end of the parens; if they are inverted, this is failure.
854         if (term.invert()) {
855             context->term -= term.atom.parenthesesWidth;
856             return false;
857         }
858
859         return true;
860     }
861
862     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
863     {
864         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
865         ASSERT(term.atom.quantityCount == 1);
866
867         // We've failed to match parens; if they are inverted, this is win!
868         if (term.invert()) {
869             context->term += term.atom.parenthesesWidth;
870             return true;
871         }
872
873         return false;
874     }
875
876     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
877     {
878         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
879         ASSERT(term.atom.quantityCount == 1);
880
881         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
882
883         input.setPos(backTrack->begin);
884
885         context->term -= term.atom.parenthesesWidth;
886         return false;
887     }
888
889     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
890     {
891         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
892
893         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
894         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
895
896         backTrack->matchAmount = 0;
897         backTrack->lastContext = 0;
898
899         switch (term.atom.quantityType) {
900         case QuantifierFixedCount: {
901             // While we haven't yet reached our fixed limit,
902             while (backTrack->matchAmount < term.atom.quantityCount) {
903                 // Try to do a match, and it it succeeds, add it to the list.
904                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
905                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
906                 if (result == JSRegExpMatch)
907                     appendParenthesesDisjunctionContext(backTrack, context);
908                 else {
909                     // The match failed; try to find an alternate point to carry on from.
910                     resetMatches(term, context);
911                     freeParenthesesDisjunctionContext(context);
912
913                     if (result != JSRegExpNoMatch)
914                         return result;
915                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
916                     if (backtrackResult != JSRegExpMatch)
917                         return backtrackResult;
918                 }
919             }
920
921             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
922             ParenthesesDisjunctionContext* context = backTrack->lastContext;
923             recordParenthesesMatch(term, context);
924             return JSRegExpMatch;
925         }
926
927         case QuantifierGreedy: {
928             while (backTrack->matchAmount < term.atom.quantityCount) {
929                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
930                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
931                 if (result == JSRegExpMatch)
932                     appendParenthesesDisjunctionContext(backTrack, context);
933                 else {
934                     resetMatches(term, context);
935                     freeParenthesesDisjunctionContext(context);
936
937                     if (result != JSRegExpNoMatch)
938                         return result;
939
940                     break;
941                 }
942             }
943
944             if (backTrack->matchAmount) {
945                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
946                 recordParenthesesMatch(term, context);
947             }
948             return JSRegExpMatch;
949         }
950
951         case QuantifierNonGreedy:
952             return JSRegExpMatch;
953         }
954
955         ASSERT_NOT_REACHED();
956         return JSRegExpErrorNoMatch;
957     }
958
959     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
960     //
961     // Greedy matches never should try just adding more - you should already have done
962     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
963     // you backtrack an item off the list needs checking, since we'll never have matched
964     // the one less case.  Tracking forwards, still add as much as possible.
965     //
966     // Non-greedy, we've already done the one less case, so don't match on popping.
967     // We haven't done the one more case, so always try to add that.
968     //
969     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
970     {
971         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
972
973         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
974         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
975
976         switch (term.atom.quantityType) {
977         case QuantifierFixedCount: {
978             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
979
980             ParenthesesDisjunctionContext* context = 0;
981             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
982
983             if (result != JSRegExpMatch)
984                 return result;
985
986             // While we haven't yet reached our fixed limit,
987             while (backTrack->matchAmount < term.atom.quantityCount) {
988                 // Try to do a match, and it it succeeds, add it to the list.
989                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
990                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
991
992                 if (result == JSRegExpMatch)
993                     appendParenthesesDisjunctionContext(backTrack, context);
994                 else {
995                     // The match failed; try to find an alternate point to carry on from.
996                     resetMatches(term, context);
997                     freeParenthesesDisjunctionContext(context);
998
999                     if (result != JSRegExpNoMatch)
1000                         return result;
1001                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1002                     if (backtrackResult != JSRegExpMatch)
1003                         return backtrackResult;
1004                 }
1005             }
1006
1007             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
1008             context = backTrack->lastContext;
1009             recordParenthesesMatch(term, context);
1010             return JSRegExpMatch;
1011         }
1012
1013         case QuantifierGreedy: {
1014             if (!backTrack->matchAmount)
1015                 return JSRegExpNoMatch;
1016
1017             ParenthesesDisjunctionContext* context = backTrack->lastContext;
1018             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1019             if (result == JSRegExpMatch) {
1020                 while (backTrack->matchAmount < term.atom.quantityCount) {
1021                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1022                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1023                     if (parenthesesResult == JSRegExpMatch)
1024                         appendParenthesesDisjunctionContext(backTrack, context);
1025                     else {
1026                         resetMatches(term, context);
1027                         freeParenthesesDisjunctionContext(context);
1028
1029                         if (parenthesesResult != JSRegExpNoMatch)
1030                             return parenthesesResult;
1031
1032                         break;
1033                     }
1034                 }
1035             } else {
1036                 resetMatches(term, context);
1037                 popParenthesesDisjunctionContext(backTrack);
1038                 freeParenthesesDisjunctionContext(context);
1039
1040                 if (result != JSRegExpNoMatch)
1041                     return result;
1042             }
1043
1044             if (backTrack->matchAmount) {
1045                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1046                 recordParenthesesMatch(term, context);
1047             }
1048             return JSRegExpMatch;
1049         }
1050
1051         case QuantifierNonGreedy: {
1052             // If we've not reached the limit, try to add one more match.
1053             if (backTrack->matchAmount < term.atom.quantityCount) {
1054                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1055                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1056                 if (result == JSRegExpMatch) {
1057                     appendParenthesesDisjunctionContext(backTrack, context);
1058                     recordParenthesesMatch(term, context);
1059                     return JSRegExpMatch;
1060                 }
1061
1062                 resetMatches(term, context);
1063                 freeParenthesesDisjunctionContext(context);
1064
1065                 if (result != JSRegExpNoMatch)
1066                     return result;
1067             }
1068
1069             // Nope - okay backtrack looking for an alternative.
1070             while (backTrack->matchAmount) {
1071                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1072                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1073                 if (result == JSRegExpMatch) {
1074                     // successful backtrack! we're back in the game!
1075                     if (backTrack->matchAmount) {
1076                         context = backTrack->lastContext;
1077                         recordParenthesesMatch(term, context);
1078                     }
1079                     return JSRegExpMatch;
1080                 }
1081
1082                 // pop a match off the stack
1083                 resetMatches(term, context);
1084                 popParenthesesDisjunctionContext(backTrack);
1085                 freeParenthesesDisjunctionContext(context);
1086
1087                 if (result != JSRegExpNoMatch)
1088                     return result;
1089             }
1090
1091             return JSRegExpNoMatch;
1092         }
1093         }
1094
1095         ASSERT_NOT_REACHED();
1096         return JSRegExpErrorNoMatch;
1097     }
1098
1099     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1100     {
1101         UNUSED_PARAM(term);
1102         unsigned matchBegin = context->matchBegin;
1103
1104         if (matchBegin) {
1105             for (matchBegin--; true; matchBegin--) {
1106                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1107                     ++matchBegin;
1108                     break;
1109                 }
1110
1111                 if (!matchBegin)
1112                     break;
1113             }
1114         }
1115
1116         unsigned matchEnd = input.getPos();
1117
1118         for (; (matchEnd != input.end())
1119              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1120
1121         if (((matchBegin && term.anchors.m_bol)
1122              || ((matchEnd != input.end()) && term.anchors.m_eol))
1123             && !pattern->m_multiline)
1124             return false;
1125
1126         context->matchBegin = matchBegin;
1127         context->matchEnd = matchEnd;
1128         return true;
1129     }
1130
1131 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1132 #define BACKTRACK() { --context->term; goto backtrack; }
1133 #define currentTerm() (disjunction->terms[context->term])
1134     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1135     {
1136         if (!--remainingMatchCount)
1137             return JSRegExpErrorHitLimit;
1138
1139         if (btrack)
1140             BACKTRACK();
1141
1142         context->matchBegin = input.getPos();
1143         context->term = 0;
1144
1145     matchAgain:
1146         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1147
1148         switch (currentTerm().type) {
1149         case ByteTerm::TypeSubpatternBegin:
1150             MATCH_NEXT();
1151         case ByteTerm::TypeSubpatternEnd:
1152             context->matchEnd = input.getPos();
1153             return JSRegExpMatch;
1154
1155         case ByteTerm::TypeBodyAlternativeBegin:
1156             MATCH_NEXT();
1157         case ByteTerm::TypeBodyAlternativeDisjunction:
1158         case ByteTerm::TypeBodyAlternativeEnd:
1159             context->matchEnd = input.getPos();
1160             return JSRegExpMatch;
1161
1162         case ByteTerm::TypeAlternativeBegin:
1163             MATCH_NEXT();
1164         case ByteTerm::TypeAlternativeDisjunction:
1165         case ByteTerm::TypeAlternativeEnd: {
1166             int offset = currentTerm().alternative.end;
1167             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1168             backTrack->offset = offset;
1169             context->term += offset;
1170             MATCH_NEXT();
1171         }
1172
1173         case ByteTerm::TypeAssertionBOL:
1174             if (matchAssertionBOL(currentTerm()))
1175                 MATCH_NEXT();
1176             BACKTRACK();
1177         case ByteTerm::TypeAssertionEOL:
1178             if (matchAssertionEOL(currentTerm()))
1179                 MATCH_NEXT();
1180             BACKTRACK();
1181         case ByteTerm::TypeAssertionWordBoundary:
1182             if (matchAssertionWordBoundary(currentTerm()))
1183                 MATCH_NEXT();
1184             BACKTRACK();
1185
1186         case ByteTerm::TypePatternCharacterOnce:
1187         case ByteTerm::TypePatternCharacterFixed: {
1188             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1189                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
1190                     BACKTRACK();
1191             }
1192             MATCH_NEXT();
1193         }
1194         case ByteTerm::TypePatternCharacterGreedy: {
1195             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1196             unsigned matchAmount = 0;
1197             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1198                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
1199                     input.uncheckInput(1);
1200                     break;
1201                 }
1202                 ++matchAmount;
1203             }
1204             backTrack->matchAmount = matchAmount;
1205
1206             MATCH_NEXT();
1207         }
1208         case ByteTerm::TypePatternCharacterNonGreedy: {
1209             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1210             backTrack->matchAmount = 0;
1211             MATCH_NEXT();
1212         }
1213
1214         case ByteTerm::TypePatternCasedCharacterOnce:
1215         case ByteTerm::TypePatternCasedCharacterFixed: {
1216             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1217                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
1218                     BACKTRACK();
1219             }
1220             MATCH_NEXT();
1221         }
1222         case ByteTerm::TypePatternCasedCharacterGreedy: {
1223             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1224             unsigned matchAmount = 0;
1225             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1226                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
1227                     input.uncheckInput(1);
1228                     break;
1229                 }
1230                 ++matchAmount;
1231             }
1232             backTrack->matchAmount = matchAmount;
1233
1234             MATCH_NEXT();
1235         }
1236         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1237             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1238             backTrack->matchAmount = 0;
1239             MATCH_NEXT();
1240         }
1241
1242         case ByteTerm::TypeCharacterClass:
1243             if (matchCharacterClass(currentTerm(), context))
1244                 MATCH_NEXT();
1245             BACKTRACK();
1246         case ByteTerm::TypeBackReference:
1247             if (matchBackReference(currentTerm(), context))
1248                 MATCH_NEXT();
1249             BACKTRACK();
1250         case ByteTerm::TypeParenthesesSubpattern: {
1251             JSRegExpResult result = matchParentheses(currentTerm(), context);
1252
1253             if (result == JSRegExpMatch) {
1254                 MATCH_NEXT();
1255             }  else if (result != JSRegExpNoMatch)
1256                 return result;
1257
1258             BACKTRACK();
1259         }
1260         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1261             if (matchParenthesesOnceBegin(currentTerm(), context))
1262                 MATCH_NEXT();
1263             BACKTRACK();
1264         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1265             if (matchParenthesesOnceEnd(currentTerm(), context))
1266                 MATCH_NEXT();
1267             BACKTRACK();
1268         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1269             if (matchParenthesesTerminalBegin(currentTerm(), context))
1270                 MATCH_NEXT();
1271             BACKTRACK();
1272         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1273             if (matchParenthesesTerminalEnd(currentTerm(), context))
1274                 MATCH_NEXT();
1275             BACKTRACK();
1276         case ByteTerm::TypeParentheticalAssertionBegin:
1277             if (matchParentheticalAssertionBegin(currentTerm(), context))
1278                 MATCH_NEXT();
1279             BACKTRACK();
1280         case ByteTerm::TypeParentheticalAssertionEnd:
1281             if (matchParentheticalAssertionEnd(currentTerm(), context))
1282                 MATCH_NEXT();
1283             BACKTRACK();
1284
1285         case ByteTerm::TypeCheckInput:
1286             if (input.checkInput(currentTerm().checkInputCount))
1287                 MATCH_NEXT();
1288             BACKTRACK();
1289
1290         case ByteTerm::TypeUncheckInput:
1291             input.uncheckInput(currentTerm().checkInputCount);
1292             MATCH_NEXT();
1293                 
1294         case ByteTerm::TypeDotStarEnclosure:
1295             if (matchDotStarEnclosure(currentTerm(), context))
1296                 return JSRegExpMatch;
1297             BACKTRACK();
1298         }
1299
1300         // We should never fall-through to here.
1301         ASSERT_NOT_REACHED();
1302
1303     backtrack:
1304         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1305
1306         switch (currentTerm().type) {
1307         case ByteTerm::TypeSubpatternBegin:
1308             return JSRegExpNoMatch;
1309         case ByteTerm::TypeSubpatternEnd:
1310             ASSERT_NOT_REACHED();
1311
1312         case ByteTerm::TypeBodyAlternativeBegin:
1313         case ByteTerm::TypeBodyAlternativeDisjunction: {
1314             int offset = currentTerm().alternative.next;
1315             context->term += offset;
1316             if (offset > 0)
1317                 MATCH_NEXT();
1318
1319             if (input.atEnd())
1320                 return JSRegExpNoMatch;
1321
1322             input.next();
1323
1324             context->matchBegin = input.getPos();
1325
1326             if (currentTerm().alternative.onceThrough)
1327                 context->term += currentTerm().alternative.next;
1328
1329             MATCH_NEXT();
1330         }
1331         case ByteTerm::TypeBodyAlternativeEnd:
1332             ASSERT_NOT_REACHED();
1333
1334         case ByteTerm::TypeAlternativeBegin:
1335         case ByteTerm::TypeAlternativeDisjunction: {
1336             int offset = currentTerm().alternative.next;
1337             context->term += offset;
1338             if (offset > 0)
1339                 MATCH_NEXT();
1340             BACKTRACK();
1341         }
1342         case ByteTerm::TypeAlternativeEnd: {
1343             // We should never backtrack back into an alternative of the main body of the regex.
1344             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1345             unsigned offset = backTrack->offset;
1346             context->term -= offset;
1347             BACKTRACK();
1348         }
1349
1350         case ByteTerm::TypeAssertionBOL:
1351         case ByteTerm::TypeAssertionEOL:
1352         case ByteTerm::TypeAssertionWordBoundary:
1353             BACKTRACK();
1354
1355         case ByteTerm::TypePatternCharacterOnce:
1356         case ByteTerm::TypePatternCharacterFixed:
1357         case ByteTerm::TypePatternCharacterGreedy:
1358         case ByteTerm::TypePatternCharacterNonGreedy:
1359             if (backtrackPatternCharacter(currentTerm(), context))
1360                 MATCH_NEXT();
1361             BACKTRACK();
1362         case ByteTerm::TypePatternCasedCharacterOnce:
1363         case ByteTerm::TypePatternCasedCharacterFixed:
1364         case ByteTerm::TypePatternCasedCharacterGreedy:
1365         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1366             if (backtrackPatternCasedCharacter(currentTerm(), context))
1367                 MATCH_NEXT();
1368             BACKTRACK();
1369         case ByteTerm::TypeCharacterClass:
1370             if (backtrackCharacterClass(currentTerm(), context))
1371                 MATCH_NEXT();
1372             BACKTRACK();
1373         case ByteTerm::TypeBackReference:
1374             if (backtrackBackReference(currentTerm(), context))
1375                 MATCH_NEXT();
1376             BACKTRACK();
1377         case ByteTerm::TypeParenthesesSubpattern: {
1378             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1379
1380             if (result == JSRegExpMatch) {
1381                 MATCH_NEXT();
1382             } else if (result != JSRegExpNoMatch)
1383                 return result;
1384
1385             BACKTRACK();
1386         }
1387         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1388             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1389                 MATCH_NEXT();
1390             BACKTRACK();
1391         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1392             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1393                 MATCH_NEXT();
1394             BACKTRACK();
1395         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1396             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1397                 MATCH_NEXT();
1398             BACKTRACK();
1399         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1400             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1401                 MATCH_NEXT();
1402             BACKTRACK();
1403         case ByteTerm::TypeParentheticalAssertionBegin:
1404             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1405                 MATCH_NEXT();
1406             BACKTRACK();
1407         case ByteTerm::TypeParentheticalAssertionEnd:
1408             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1409                 MATCH_NEXT();
1410             BACKTRACK();
1411
1412         case ByteTerm::TypeCheckInput:
1413             input.uncheckInput(currentTerm().checkInputCount);
1414             BACKTRACK();
1415
1416         case ByteTerm::TypeUncheckInput:
1417             input.checkInput(currentTerm().checkInputCount);
1418             BACKTRACK();
1419
1420         case ByteTerm::TypeDotStarEnclosure:
1421             ASSERT_NOT_REACHED();
1422         }
1423
1424         ASSERT_NOT_REACHED();
1425         return JSRegExpErrorNoMatch;
1426     }
1427
1428     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1429     {
1430         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1431
1432         if (result == JSRegExpMatch) {
1433             while (context->matchBegin == context->matchEnd) {
1434                 result = matchDisjunction(disjunction, context, true);
1435                 if (result != JSRegExpMatch)
1436                     return result;
1437             }
1438             return JSRegExpMatch;
1439         }
1440
1441         return result;
1442     }
1443
1444     int interpret()
1445     {
1446         allocatorPool = pattern->m_allocator->startAllocator();
1447         if (!allocatorPool)
1448             CRASH();
1449
1450         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
1451             output[i] = -1;
1452
1453         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1454
1455         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1456         if (result == JSRegExpMatch) {
1457             output[0] = context->matchBegin;
1458             output[1] = context->matchEnd;
1459         }
1460
1461         freeDisjunctionContext(context);
1462
1463         pattern->m_allocator->stopAllocator();
1464
1465         // RegExp.cpp currently expects all error to be converted to -1.
1466         ASSERT((result == JSRegExpMatch) == (output[0] != -1));
1467         return output[0];
1468     }
1469
1470     Interpreter(BytecodePattern* pattern, int* output, const UString input, unsigned start, unsigned length)
1471         : pattern(pattern)
1472         , output(output)
1473         , input(input, start, length)
1474         , allocatorPool(0)
1475         , remainingMatchCount(matchLimit)
1476     {
1477     }
1478
1479 private:
1480     BytecodePattern* pattern;
1481     int* output;
1482     InputStream input;
1483     BumpPointerPool* allocatorPool;
1484     unsigned remainingMatchCount;
1485 };
1486
1487
1488
1489 class ByteCompiler {
1490     struct ParenthesesStackEntry {
1491         unsigned beginTerm;
1492         unsigned savedAlternativeIndex;
1493         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1494             : beginTerm(beginTerm)
1495             , savedAlternativeIndex(savedAlternativeIndex)
1496         {
1497         }
1498     };
1499
1500 public:
1501     ByteCompiler(YarrPattern& pattern)
1502         : m_pattern(pattern)
1503     {
1504         m_currentAlternativeIndex = 0;
1505     }
1506
1507     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1508     {
1509         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1510         emitDisjunction(m_pattern.m_body);
1511         regexEnd();
1512
1513         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1514     }
1515
1516     void checkInput(unsigned count)
1517     {
1518         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1519     }
1520
1521     void uncheckInput(unsigned count)
1522     {
1523         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1524     }
1525     
1526     void assertionBOL(int inputPosition)
1527     {
1528         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1529     }
1530
1531     void assertionEOL(int inputPosition)
1532     {
1533         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1534     }
1535
1536     void assertionWordBoundary(bool invert, int inputPosition)
1537     {
1538         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1539     }
1540
1541     void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1542     {
1543         if (m_pattern.m_ignoreCase) {
1544             UChar lo = Unicode::toLower(ch);
1545             UChar hi = Unicode::toUpper(ch);
1546
1547             if (lo != hi) {
1548                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1549                 return;
1550             }
1551         }
1552
1553         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1554     }
1555
1556     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1557     {
1558         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1559
1560         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1561         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1562         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1563     }
1564
1565     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1566     {
1567         ASSERT(subpatternId);
1568
1569         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1570
1571         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1572         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1573         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1574     }
1575
1576     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1577     {
1578         int beginTerm = m_bodyDisjunction->terms.size();
1579
1580         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1581         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1582         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1583         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1584
1585         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1586         m_currentAlternativeIndex = beginTerm + 1;
1587     }
1588
1589     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1590     {
1591         int beginTerm = m_bodyDisjunction->terms.size();
1592
1593         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1594         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1595         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1596         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1597
1598         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1599         m_currentAlternativeIndex = beginTerm + 1;
1600     }
1601
1602     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1603     {
1604         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1605         // then fix this up at the end! - simplifying this should make it much clearer.
1606         // https://bugs.webkit.org/show_bug.cgi?id=50136
1607
1608         int beginTerm = m_bodyDisjunction->terms.size();
1609
1610         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1611         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1612         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1613         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1614
1615         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1616         m_currentAlternativeIndex = beginTerm + 1;
1617     }
1618
1619     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1620     {
1621         int beginTerm = m_bodyDisjunction->terms.size();
1622
1623         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1624         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1625         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1626         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1627
1628         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1629         m_currentAlternativeIndex = beginTerm + 1;
1630     }
1631
1632     void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1633     {
1634         unsigned beginTerm = popParenthesesStack();
1635         closeAlternative(beginTerm + 1);
1636         unsigned endTerm = m_bodyDisjunction->terms.size();
1637
1638         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1639
1640         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1641         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1642
1643         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1644         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1645         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1646         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1647
1648         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1649         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1650         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1651         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1652     }
1653
1654     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1655     {
1656         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1657     }
1658
1659     unsigned popParenthesesStack()
1660     {
1661         ASSERT(m_parenthesesStack.size());
1662         int stackEnd = m_parenthesesStack.size() - 1;
1663         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1664         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1665         m_parenthesesStack.shrink(stackEnd);
1666
1667         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1668         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1669
1670         return beginTerm;
1671     }
1672
1673 #ifndef NDEBUG
1674     void dumpDisjunction(ByteDisjunction* disjunction)
1675     {
1676         printf("ByteDisjunction(%p):\n\t", disjunction);
1677         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1678             printf("{ %d } ", disjunction->terms[i].type);
1679         printf("\n");
1680     }
1681 #endif
1682
1683     void closeAlternative(int beginTerm)
1684     {
1685         int origBeginTerm = beginTerm;
1686         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1687         int endIndex = m_bodyDisjunction->terms.size();
1688
1689         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1690
1691         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1692             m_bodyDisjunction->terms.remove(beginTerm);
1693         else {
1694             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1695                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1696                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1697                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1698                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1699             }
1700
1701             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1702
1703             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1704             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1705         }
1706     }
1707
1708     void closeBodyAlternative()
1709     {
1710         int beginTerm = 0;
1711         int origBeginTerm = 0;
1712         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1713         int endIndex = m_bodyDisjunction->terms.size();
1714
1715         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1716
1717         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1718             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1719             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1720             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1721             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1722         }
1723
1724         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1725
1726         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1727         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1728     }
1729
1730     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1731     {
1732         unsigned beginTerm = popParenthesesStack();
1733         closeAlternative(beginTerm + 1);
1734         unsigned endTerm = m_bodyDisjunction->terms.size();
1735
1736         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1737
1738         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1739
1740         bool capture = parenthesesBegin.capture();
1741         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1742
1743         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1744         ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1745
1746         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1747         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1748             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1749         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1750
1751         m_bodyDisjunction->terms.shrink(beginTerm);
1752
1753         m_allParenthesesInfo.append(parenthesesDisjunction);
1754         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1755
1756         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1757         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1758         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1759     }
1760
1761     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1762     {
1763         unsigned beginTerm = popParenthesesStack();
1764         closeAlternative(beginTerm + 1);
1765         unsigned endTerm = m_bodyDisjunction->terms.size();
1766
1767         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1768
1769         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1770         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1771
1772         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1773         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1774         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1775         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1776
1777         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1778         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1779         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1780         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1781     }
1782
1783     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1784     {
1785         unsigned beginTerm = popParenthesesStack();
1786         closeAlternative(beginTerm + 1);
1787         unsigned endTerm = m_bodyDisjunction->terms.size();
1788
1789         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1790
1791         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1792         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1793
1794         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1795         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1796         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1797         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1798
1799         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1800         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1801         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1802         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1803     }
1804
1805     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1806     {
1807         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1808         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1809         m_bodyDisjunction->terms[0].frameLocation = 0;
1810         m_currentAlternativeIndex = 0;
1811     }
1812
1813     void regexEnd()
1814     {
1815         closeBodyAlternative();
1816     }
1817
1818     void alternativeBodyDisjunction(bool onceThrough)
1819     {
1820         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1821         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1822         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1823
1824         m_currentAlternativeIndex = newAlternativeIndex;
1825     }
1826
1827     void alternativeDisjunction()
1828     {
1829         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1830         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1831         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1832
1833         m_currentAlternativeIndex = newAlternativeIndex;
1834     }
1835
1836     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1837     {
1838         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1839             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1840
1841             PatternAlternative* alternative = disjunction->m_alternatives[alt];
1842
1843             if (alt) {
1844                 if (disjunction == m_pattern.m_body)
1845                     alternativeBodyDisjunction(alternative->onceThrough());
1846                 else
1847                     alternativeDisjunction();
1848             }
1849
1850             unsigned minimumSize = alternative->m_minimumSize;
1851             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1852             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1853
1854             if (countToCheck) {
1855                 checkInput(countToCheck);
1856                 currentCountAlreadyChecked += countToCheck;
1857             }
1858
1859             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1860                 PatternTerm& term = alternative->m_terms[i];
1861
1862                 switch (term.type) {
1863                 case PatternTerm::TypeAssertionBOL:
1864                     assertionBOL(term.inputPosition - currentCountAlreadyChecked);
1865                     break;
1866
1867                 case PatternTerm::TypeAssertionEOL:
1868                     assertionEOL(term.inputPosition - currentCountAlreadyChecked);
1869                     break;
1870
1871                 case PatternTerm::TypeAssertionWordBoundary:
1872                     assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
1873                     break;
1874
1875                 case PatternTerm::TypePatternCharacter:
1876                     atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1877                     break;
1878
1879                 case PatternTerm::TypeCharacterClass:
1880                     atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1881                     break;
1882
1883                 case PatternTerm::TypeBackReference:
1884                     atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1885                         break;
1886
1887                 case PatternTerm::TypeForwardReference:
1888                     break;
1889
1890                 case PatternTerm::TypeParenthesesSubpattern: {
1891                     unsigned disjunctionAlreadyCheckedCount = 0;
1892                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1893                         unsigned alternativeFrameLocation = term.frameLocation;
1894                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1895                         if (term.quantityType == QuantifierFixedCount)
1896                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1897                         else
1898                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1899                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1900                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
1901                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1902                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1903                     } else if (term.parentheses.isTerminal) {
1904                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1905                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1906                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1907                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1908                     } else {
1909                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1910                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
1911                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1912                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1913                     }
1914                     break;
1915                 }
1916
1917                 case PatternTerm::TypeParentheticalAssertion: {
1918                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1919
1920                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1921                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1922                     unsigned uncheckAmount = 0;
1923                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1924                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1925                         uncheckInput(uncheckAmount);
1926                         currentCountAlreadyChecked -= uncheckAmount;
1927                     }
1928
1929                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1930                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1931                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1932                     if (uncheckAmount) {
1933                         checkInput(uncheckAmount);
1934                         currentCountAlreadyChecked += uncheckAmount;
1935                     }
1936                     break;
1937                 }
1938
1939                 case PatternTerm::TypeDotStarEnclosure:
1940                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1941                     break;
1942                 }
1943             }
1944         }
1945     }
1946
1947 private:
1948     YarrPattern& m_pattern;
1949     OwnPtr<ByteDisjunction> m_bodyDisjunction;
1950     unsigned m_currentAlternativeIndex;
1951     Vector<ParenthesesStackEntry> m_parenthesesStack;
1952     Vector<ByteDisjunction*> m_allParenthesesInfo;
1953 };
1954
1955 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1956 {
1957     return ByteCompiler(pattern).compile(allocator);
1958 }
1959
1960 int interpret(BytecodePattern* bytecode, const UString& input, unsigned start, unsigned length, int* output)
1961 {
1962     return Interpreter(bytecode, output, input, start, length).interpret();
1963 }
1964
1965 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1966 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1967 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1968 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1969 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1970 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1971 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1972
1973
1974 } }