updating changelog for release
[profile/ivi/webkit-efl.git] / Source / JavaScriptCore / yarr / YarrInterpreter.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "YarrInterpreter.h"
29
30 #include "UString.h"
31 #include "Yarr.h"
32 #include "YarrCanonicalizeUCS2.h"
33 #include <wtf/BumpPointerAllocator.h>
34 #include <wtf/DataLog.h>
35 #include <wtf/text/CString.h>
36
37 #ifndef NDEBUG
38 #include <stdio.h>
39 #endif
40
41 using namespace WTF;
42
43 namespace JSC { namespace Yarr {
44
45 template<typename CharType>
46 class Interpreter {
47 public:
48     struct ParenthesesDisjunctionContext;
49
50     struct BackTrackInfoPatternCharacter {
51         uintptr_t matchAmount;
52     };
53     struct BackTrackInfoCharacterClass {
54         uintptr_t matchAmount;
55     };
56     struct BackTrackInfoBackReference {
57         uintptr_t begin; // Not really needed for greedy quantifiers.
58         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
59     };
60     struct BackTrackInfoAlternative {
61         uintptr_t offset;
62     };
63     struct BackTrackInfoParentheticalAssertion {
64         uintptr_t begin;
65     };
66     struct BackTrackInfoParenthesesOnce {
67         uintptr_t begin;
68     };
69     struct BackTrackInfoParenthesesTerminal {
70         uintptr_t begin;
71     };
72     struct BackTrackInfoParentheses {
73         uintptr_t matchAmount;
74         ParenthesesDisjunctionContext* lastContext;
75     };
76
77     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
78     {
79         context->next = backTrack->lastContext;
80         backTrack->lastContext = context;
81         ++backTrack->matchAmount;
82     }
83
84     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
85     {
86         ASSERT(backTrack->matchAmount);
87         ASSERT(backTrack->lastContext);
88         backTrack->lastContext = backTrack->lastContext->next;
89         --backTrack->matchAmount;
90     }
91
92     struct DisjunctionContext
93     {
94         DisjunctionContext()
95             : term(0)
96         {
97         }
98
99         void* operator new(size_t, void* where)
100         {
101             return where;
102         }
103
104         int term;
105         unsigned matchBegin;
106         unsigned matchEnd;
107         uintptr_t frame[1];
108     };
109
110     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
111     {
112         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
113         allocatorPool = allocatorPool->ensureCapacity(size);
114         if (!allocatorPool)
115             CRASH();
116         return new (allocatorPool->alloc(size)) DisjunctionContext();
117     }
118
119     void freeDisjunctionContext(DisjunctionContext* context)
120     {
121         allocatorPool = allocatorPool->dealloc(context);
122     }
123
124     struct ParenthesesDisjunctionContext
125     {
126         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
127             : next(0)
128         {
129             unsigned firstSubpatternId = term.atom.subpatternId;
130             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
131
132             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
133                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
134                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
135             }
136
137             new (getDisjunctionContext(term)) DisjunctionContext();
138         }
139
140         void* operator new(size_t, void* where)
141         {
142             return where;
143         }
144
145         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
146         {
147             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
148                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
149         }
150
151         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
152         {
153             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
154         }
155
156         ParenthesesDisjunctionContext* next;
157         unsigned subpatternBackup[1];
158     };
159
160     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
161     {
162         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
163         allocatorPool = allocatorPool->ensureCapacity(size);
164         if (!allocatorPool)
165             CRASH();
166         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
167     }
168
169     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
170     {
171         allocatorPool = allocatorPool->dealloc(context);
172     }
173
174     class InputStream {
175     public:
176         InputStream(const CharType* input, unsigned start, unsigned length)
177             : input(input)
178             , pos(start)
179             , length(length)
180         {
181         }
182
183         void next()
184         {
185             ++pos;
186         }
187
188         void rewind(unsigned amount)
189         {
190             ASSERT(pos >= amount);
191             pos -= amount;
192         }
193
194         int read()
195         {
196             ASSERT(pos < length);
197             if (pos < length)
198                 return input[pos];
199             return -1;
200         }
201
202         int readPair()
203         {
204             ASSERT(pos + 1 < length);
205             return input[pos] | input[pos + 1] << 16;
206         }
207
208         int readChecked(unsigned negativePositionOffest)
209         {
210             if (pos < negativePositionOffest)
211                 CRASH();
212             unsigned p = pos - negativePositionOffest;
213             ASSERT(p < length);
214             return input[p];
215         }
216
217         int reread(unsigned from)
218         {
219             ASSERT(from < length);
220             return input[from];
221         }
222
223         int prev()
224         {
225             ASSERT(!(pos > length));
226             if (pos && length)
227                 return input[pos - 1];
228             return -1;
229         }
230
231         unsigned getPos()
232         {
233             return pos;
234         }
235
236         void setPos(unsigned p)
237         {
238             pos = p;
239         }
240
241         bool atStart()
242         {
243             return pos == 0;
244         }
245
246         bool atEnd()
247         {
248             return pos == length;
249         }
250
251         unsigned end()
252         {
253             return length;
254         }
255
256         bool checkInput(unsigned count)
257         {
258             if (((pos + count) <= length) && ((pos + count) >= pos)) {
259                 pos += count;
260                 return true;
261             }
262             return false;
263         }
264
265         void uncheckInput(unsigned count)
266         {
267             if (pos < count)
268                 CRASH();
269             pos -= count;
270         }
271
272         bool atStart(unsigned negativePositionOffest)
273         {
274             return pos == negativePositionOffest;
275         }
276
277         bool atEnd(unsigned negativePositionOffest)
278         {
279             if (pos < negativePositionOffest)
280                 CRASH();
281             return (pos - negativePositionOffest) == length;
282         }
283
284         bool isAvailableInput(unsigned offset)
285         {
286             return (((pos + offset) <= length) && ((pos + offset) >= pos));
287         }
288
289     private:
290         const CharType* input;
291         unsigned pos;
292         unsigned length;
293     };
294
295     bool testCharacterClass(CharacterClass* characterClass, int ch)
296     {
297         if (ch & 0xFF80) {
298             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
299                 if (ch == characterClass->m_matchesUnicode[i])
300                     return true;
301             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
302                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
303                     return true;
304         } else {
305             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
306                 if (ch == characterClass->m_matches[i])
307                     return true;
308             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
309                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
310                     return true;
311         }
312
313         return false;
314     }
315
316     bool checkCharacter(int testChar, unsigned negativeInputOffset)
317     {
318         return testChar == input.readChecked(negativeInputOffset);
319     }
320
321     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
322     {
323         int ch = input.readChecked(negativeInputOffset);
324         return (loChar == ch) || (hiChar == ch);
325     }
326
327     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
328     {
329         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
330         return invert ? !match : match;
331     }
332
333     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
334     {
335         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
336
337         if (!input.checkInput(matchSize))
338             return false;
339
340         if (pattern->m_ignoreCase) {
341             for (unsigned i = 0; i < matchSize; ++i) {
342                 int oldCh = input.reread(matchBegin + i);
343                 int ch = input.readChecked(negativeInputOffset + matchSize - i);
344
345                 if (oldCh == ch)
346                     continue;
347
348                 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
349                 // unicode values are never allowed to match against ascii ones.
350                 if (isASCII(oldCh) || isASCII(ch)) {
351                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
352                         continue;
353                 } else if (areCanonicallyEquivalent(oldCh, ch))
354                     continue;
355
356                 input.uncheckInput(matchSize);
357                 return false;
358             }
359         } else {
360             for (unsigned i = 0; i < matchSize; ++i) {
361                 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
362                     input.uncheckInput(matchSize);
363                     return false;
364                 }
365             }
366         }
367
368         return true;
369     }
370
371     bool matchAssertionBOL(ByteTerm& term)
372     {
373         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
374     }
375
376     bool matchAssertionEOL(ByteTerm& term)
377     {
378         if (term.inputPosition)
379             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
380
381         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
382     }
383
384     bool matchAssertionWordBoundary(ByteTerm& term)
385     {
386         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
387         bool readIsWordchar;
388         if (term.inputPosition)
389             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
390         else
391             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
392
393         bool wordBoundary = prevIsWordchar != readIsWordchar;
394         return term.invert() ? !wordBoundary : wordBoundary;
395     }
396
397     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
398     {
399         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
400
401         switch (term.atom.quantityType) {
402         case QuantifierFixedCount:
403             break;
404
405         case QuantifierGreedy:
406             if (backTrack->matchAmount) {
407                 --backTrack->matchAmount;
408                 input.uncheckInput(1);
409                 return true;
410             }
411             break;
412
413         case QuantifierNonGreedy:
414             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
415                 ++backTrack->matchAmount;
416                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
417                     return true;
418             }
419             input.uncheckInput(backTrack->matchAmount);
420             break;
421         }
422
423         return false;
424     }
425
426     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
427     {
428         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
429
430         switch (term.atom.quantityType) {
431         case QuantifierFixedCount:
432             break;
433
434         case QuantifierGreedy:
435             if (backTrack->matchAmount) {
436                 --backTrack->matchAmount;
437                 input.uncheckInput(1);
438                 return true;
439             }
440             break;
441
442         case QuantifierNonGreedy:
443             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
444                 ++backTrack->matchAmount;
445                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
446                     return true;
447             }
448             input.uncheckInput(backTrack->matchAmount);
449             break;
450         }
451
452         return false;
453     }
454
455     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
456     {
457         ASSERT(term.type == ByteTerm::TypeCharacterClass);
458         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
459
460         switch (term.atom.quantityType) {
461         case QuantifierFixedCount: {
462             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
463                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
464                     return false;
465             }
466             return true;
467         }
468
469         case QuantifierGreedy: {
470             unsigned matchAmount = 0;
471             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
472                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
473                     input.uncheckInput(1);
474                     break;
475                 }
476                 ++matchAmount;
477             }
478             backTrack->matchAmount = matchAmount;
479
480             return true;
481         }
482
483         case QuantifierNonGreedy:
484             backTrack->matchAmount = 0;
485             return true;
486         }
487
488         ASSERT_NOT_REACHED();
489         return false;
490     }
491
492     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
493     {
494         ASSERT(term.type == ByteTerm::TypeCharacterClass);
495         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
496
497         switch (term.atom.quantityType) {
498         case QuantifierFixedCount:
499             break;
500
501         case QuantifierGreedy:
502             if (backTrack->matchAmount) {
503                 --backTrack->matchAmount;
504                 input.uncheckInput(1);
505                 return true;
506             }
507             break;
508
509         case QuantifierNonGreedy:
510             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
511                 ++backTrack->matchAmount;
512                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
513                     return true;
514             }
515             input.uncheckInput(backTrack->matchAmount);
516             break;
517         }
518
519         return false;
520     }
521
522     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
523     {
524         ASSERT(term.type == ByteTerm::TypeBackReference);
525         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
526
527         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
528         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
529
530         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
531         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
532         // Eg.: /(a\1)/
533         if (matchEnd == offsetNoMatch)
534             return true;
535
536         if (matchBegin == offsetNoMatch)
537             return true;
538
539         ASSERT(matchBegin <= matchEnd);
540
541         if (matchBegin == matchEnd)
542             return true;
543
544         switch (term.atom.quantityType) {
545         case QuantifierFixedCount: {
546             backTrack->begin = input.getPos();
547             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
548                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
549                     input.setPos(backTrack->begin);
550                     return false;
551                 }
552             }
553             return true;
554         }
555
556         case QuantifierGreedy: {
557             unsigned matchAmount = 0;
558             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
559                 ++matchAmount;
560             backTrack->matchAmount = matchAmount;
561             return true;
562         }
563
564         case QuantifierNonGreedy:
565             backTrack->begin = input.getPos();
566             backTrack->matchAmount = 0;
567             return true;
568         }
569
570         ASSERT_NOT_REACHED();
571         return false;
572     }
573
574     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
575     {
576         ASSERT(term.type == ByteTerm::TypeBackReference);
577         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
578
579         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
580         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
581
582         if (matchBegin == offsetNoMatch)
583             return false;
584
585         ASSERT(matchBegin <= matchEnd);
586
587         if (matchBegin == matchEnd)
588             return false;
589
590         switch (term.atom.quantityType) {
591         case QuantifierFixedCount:
592             // for quantityCount == 1, could rewind.
593             input.setPos(backTrack->begin);
594             break;
595
596         case QuantifierGreedy:
597             if (backTrack->matchAmount) {
598                 --backTrack->matchAmount;
599                 input.rewind(matchEnd - matchBegin);
600                 return true;
601             }
602             break;
603
604         case QuantifierNonGreedy:
605             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
606                 ++backTrack->matchAmount;
607                 return true;
608             }
609             input.setPos(backTrack->begin);
610             break;
611         }
612
613         return false;
614     }
615
616     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
617     {
618         if (term.capture()) {
619             unsigned subpatternId = term.atom.subpatternId;
620             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
621             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
622         }
623     }
624     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
625     {
626         unsigned firstSubpatternId = term.atom.subpatternId;
627         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
628         context->restoreOutput(output, firstSubpatternId, count);
629     }
630     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
631     {
632         while (backTrack->matchAmount) {
633             ParenthesesDisjunctionContext* context = backTrack->lastContext;
634
635             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
636             if (result == JSRegExpMatch)
637                 return JSRegExpMatch;
638
639             resetMatches(term, context);
640             popParenthesesDisjunctionContext(backTrack);
641             freeParenthesesDisjunctionContext(context);
642
643             if (result != JSRegExpNoMatch)
644                 return result;
645         }
646
647         return JSRegExpNoMatch;
648     }
649
650     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
651     {
652         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
653         ASSERT(term.atom.quantityCount == 1);
654
655         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
656
657         switch (term.atom.quantityType) {
658         case QuantifierGreedy: {
659             // set this speculatively; if we get to the parens end this will be true.
660             backTrack->begin = input.getPos();
661             break;
662         }
663         case QuantifierNonGreedy: {
664             backTrack->begin = notFound;
665             context->term += term.atom.parenthesesWidth;
666             return true;
667         }
668         case QuantifierFixedCount:
669             break;
670         }
671
672         if (term.capture()) {
673             unsigned subpatternId = term.atom.subpatternId;
674             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
675         }
676
677         return true;
678     }
679
680     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
681     {
682         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
683         ASSERT(term.atom.quantityCount == 1);
684
685         if (term.capture()) {
686             unsigned subpatternId = term.atom.subpatternId;
687             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
688         }
689
690         if (term.atom.quantityType == QuantifierFixedCount)
691             return true;
692
693         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
694         return backTrack->begin != input.getPos();
695     }
696
697     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
698     {
699         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
700         ASSERT(term.atom.quantityCount == 1);
701
702         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
703
704         if (term.capture()) {
705             unsigned subpatternId = term.atom.subpatternId;
706             output[(subpatternId << 1)] = offsetNoMatch;
707             output[(subpatternId << 1) + 1] = offsetNoMatch;
708         }
709
710         switch (term.atom.quantityType) {
711         case QuantifierGreedy:
712             // if we backtrack to this point, there is another chance - try matching nothing.
713             ASSERT(backTrack->begin != notFound);
714             backTrack->begin = notFound;
715             context->term += term.atom.parenthesesWidth;
716             return true;
717         case QuantifierNonGreedy:
718             ASSERT(backTrack->begin != notFound);
719         case QuantifierFixedCount:
720             break;
721         }
722
723         return false;
724     }
725
726     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
727     {
728         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
729         ASSERT(term.atom.quantityCount == 1);
730
731         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
732
733         switch (term.atom.quantityType) {
734         case QuantifierGreedy:
735             if (backTrack->begin == notFound) {
736                 context->term -= term.atom.parenthesesWidth;
737                 return false;
738             }
739         case QuantifierNonGreedy:
740             if (backTrack->begin == notFound) {
741                 backTrack->begin = input.getPos();
742                 if (term.capture()) {
743                     // Technically this access to inputPosition should be accessing the begin term's
744                     // inputPosition, but for repeats other than fixed these values should be
745                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
746                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
747                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
748                     unsigned subpatternId = term.atom.subpatternId;
749                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
750                 }
751                 context->term -= term.atom.parenthesesWidth;
752                 return true;
753             }
754         case QuantifierFixedCount:
755             break;
756         }
757
758         return false;
759     }
760
761     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
762     {
763         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
764         ASSERT(term.atom.quantityType == QuantifierGreedy);
765         ASSERT(term.atom.quantityCount == quantifyInfinite);
766         ASSERT(!term.capture());
767
768         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
769         backTrack->begin = input.getPos();
770         return true;
771     }
772
773     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
774     {
775         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
776
777         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
778         // Empty match is a failed match.
779         if (backTrack->begin == input.getPos())
780             return false;
781
782         // Successful match! Okay, what's next? - loop around and try to match moar!
783         context->term -= (term.atom.parenthesesWidth + 1);
784         return true;
785     }
786
787     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
788     {
789         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
790         ASSERT(term.atom.quantityType == QuantifierGreedy);
791         ASSERT(term.atom.quantityCount == quantifyInfinite);
792         ASSERT(!term.capture());
793
794         // If we backtrack to this point, we have failed to match this iteration of the parens.
795         // Since this is greedy / zero minimum a failed is also accepted as a match!
796         context->term += term.atom.parenthesesWidth;
797         return true;
798     }
799
800     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
801     {
802         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
803         // should always be returned as a successful match - we should never backtrack to here.
804         ASSERT_NOT_REACHED();
805         return false;
806     }
807
808     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
809     {
810         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
811         ASSERT(term.atom.quantityCount == 1);
812
813         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
814
815         backTrack->begin = input.getPos();
816         return true;
817     }
818
819     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
820     {
821         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
822         ASSERT(term.atom.quantityCount == 1);
823
824         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
825
826         input.setPos(backTrack->begin);
827
828         // We've reached the end of the parens; if they are inverted, this is failure.
829         if (term.invert()) {
830             context->term -= term.atom.parenthesesWidth;
831             return false;
832         }
833
834         return true;
835     }
836
837     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
838     {
839         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
840         ASSERT(term.atom.quantityCount == 1);
841
842         // We've failed to match parens; if they are inverted, this is win!
843         if (term.invert()) {
844             context->term += term.atom.parenthesesWidth;
845             return true;
846         }
847
848         return false;
849     }
850
851     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
852     {
853         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
854         ASSERT(term.atom.quantityCount == 1);
855
856         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
857
858         input.setPos(backTrack->begin);
859
860         context->term -= term.atom.parenthesesWidth;
861         return false;
862     }
863
864     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
865     {
866         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
867
868         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
869         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
870
871         backTrack->matchAmount = 0;
872         backTrack->lastContext = 0;
873
874         switch (term.atom.quantityType) {
875         case QuantifierFixedCount: {
876             // While we haven't yet reached our fixed limit,
877             while (backTrack->matchAmount < term.atom.quantityCount) {
878                 // Try to do a match, and it it succeeds, add it to the list.
879                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
880                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
881                 if (result == JSRegExpMatch)
882                     appendParenthesesDisjunctionContext(backTrack, context);
883                 else {
884                     // The match failed; try to find an alternate point to carry on from.
885                     resetMatches(term, context);
886                     freeParenthesesDisjunctionContext(context);
887
888                     if (result != JSRegExpNoMatch)
889                         return result;
890                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
891                     if (backtrackResult != JSRegExpMatch)
892                         return backtrackResult;
893                 }
894             }
895
896             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
897             ParenthesesDisjunctionContext* context = backTrack->lastContext;
898             recordParenthesesMatch(term, context);
899             return JSRegExpMatch;
900         }
901
902         case QuantifierGreedy: {
903             while (backTrack->matchAmount < term.atom.quantityCount) {
904                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
905                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
906                 if (result == JSRegExpMatch)
907                     appendParenthesesDisjunctionContext(backTrack, context);
908                 else {
909                     resetMatches(term, context);
910                     freeParenthesesDisjunctionContext(context);
911
912                     if (result != JSRegExpNoMatch)
913                         return result;
914
915                     break;
916                 }
917             }
918
919             if (backTrack->matchAmount) {
920                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
921                 recordParenthesesMatch(term, context);
922             }
923             return JSRegExpMatch;
924         }
925
926         case QuantifierNonGreedy:
927             return JSRegExpMatch;
928         }
929
930         ASSERT_NOT_REACHED();
931         return JSRegExpErrorNoMatch;
932     }
933
934     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
935     //
936     // Greedy matches never should try just adding more - you should already have done
937     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
938     // you backtrack an item off the list needs checking, since we'll never have matched
939     // the one less case.  Tracking forwards, still add as much as possible.
940     //
941     // Non-greedy, we've already done the one less case, so don't match on popping.
942     // We haven't done the one more case, so always try to add that.
943     //
944     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
945     {
946         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
947
948         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
949         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
950
951         switch (term.atom.quantityType) {
952         case QuantifierFixedCount: {
953             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
954
955             ParenthesesDisjunctionContext* context = 0;
956             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
957
958             if (result != JSRegExpMatch)
959                 return result;
960
961             // While we haven't yet reached our fixed limit,
962             while (backTrack->matchAmount < term.atom.quantityCount) {
963                 // Try to do a match, and it it succeeds, add it to the list.
964                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
965                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
966
967                 if (result == JSRegExpMatch)
968                     appendParenthesesDisjunctionContext(backTrack, context);
969                 else {
970                     // The match failed; try to find an alternate point to carry on from.
971                     resetMatches(term, context);
972                     freeParenthesesDisjunctionContext(context);
973
974                     if (result != JSRegExpNoMatch)
975                         return result;
976                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
977                     if (backtrackResult != JSRegExpMatch)
978                         return backtrackResult;
979                 }
980             }
981
982             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
983             context = backTrack->lastContext;
984             recordParenthesesMatch(term, context);
985             return JSRegExpMatch;
986         }
987
988         case QuantifierGreedy: {
989             if (!backTrack->matchAmount)
990                 return JSRegExpNoMatch;
991
992             ParenthesesDisjunctionContext* context = backTrack->lastContext;
993             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
994             if (result == JSRegExpMatch) {
995                 while (backTrack->matchAmount < term.atom.quantityCount) {
996                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
997                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
998                     if (parenthesesResult == JSRegExpMatch)
999                         appendParenthesesDisjunctionContext(backTrack, context);
1000                     else {
1001                         resetMatches(term, context);
1002                         freeParenthesesDisjunctionContext(context);
1003
1004                         if (parenthesesResult != JSRegExpNoMatch)
1005                             return parenthesesResult;
1006
1007                         break;
1008                     }
1009                 }
1010             } else {
1011                 resetMatches(term, context);
1012                 popParenthesesDisjunctionContext(backTrack);
1013                 freeParenthesesDisjunctionContext(context);
1014
1015                 if (result != JSRegExpNoMatch)
1016                     return result;
1017             }
1018
1019             if (backTrack->matchAmount) {
1020                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1021                 recordParenthesesMatch(term, context);
1022             }
1023             return JSRegExpMatch;
1024         }
1025
1026         case QuantifierNonGreedy: {
1027             // If we've not reached the limit, try to add one more match.
1028             if (backTrack->matchAmount < term.atom.quantityCount) {
1029                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1030                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1031                 if (result == JSRegExpMatch) {
1032                     appendParenthesesDisjunctionContext(backTrack, context);
1033                     recordParenthesesMatch(term, context);
1034                     return JSRegExpMatch;
1035                 }
1036
1037                 resetMatches(term, context);
1038                 freeParenthesesDisjunctionContext(context);
1039
1040                 if (result != JSRegExpNoMatch)
1041                     return result;
1042             }
1043
1044             // Nope - okay backtrack looking for an alternative.
1045             while (backTrack->matchAmount) {
1046                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1047                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1048                 if (result == JSRegExpMatch) {
1049                     // successful backtrack! we're back in the game!
1050                     if (backTrack->matchAmount) {
1051                         context = backTrack->lastContext;
1052                         recordParenthesesMatch(term, context);
1053                     }
1054                     return JSRegExpMatch;
1055                 }
1056
1057                 // pop a match off the stack
1058                 resetMatches(term, context);
1059                 popParenthesesDisjunctionContext(backTrack);
1060                 freeParenthesesDisjunctionContext(context);
1061
1062                 if (result != JSRegExpNoMatch)
1063                     return result;
1064             }
1065
1066             return JSRegExpNoMatch;
1067         }
1068         }
1069
1070         ASSERT_NOT_REACHED();
1071         return JSRegExpErrorNoMatch;
1072     }
1073
1074     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1075     {
1076         UNUSED_PARAM(term);
1077         unsigned matchBegin = context->matchBegin;
1078
1079         if (matchBegin) {
1080             for (matchBegin--; true; matchBegin--) {
1081                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1082                     ++matchBegin;
1083                     break;
1084                 }
1085
1086                 if (!matchBegin)
1087                     break;
1088             }
1089         }
1090
1091         unsigned matchEnd = input.getPos();
1092
1093         for (; (matchEnd != input.end())
1094              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1095
1096         if (((matchBegin && term.anchors.m_bol)
1097              || ((matchEnd != input.end()) && term.anchors.m_eol))
1098             && !pattern->m_multiline)
1099             return false;
1100
1101         context->matchBegin = matchBegin;
1102         context->matchEnd = matchEnd;
1103         return true;
1104     }
1105
1106 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1107 #define BACKTRACK() { --context->term; goto backtrack; }
1108 #define currentTerm() (disjunction->terms[context->term])
1109     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1110     {
1111         if (!--remainingMatchCount)
1112             return JSRegExpErrorHitLimit;
1113
1114         if (btrack)
1115             BACKTRACK();
1116
1117         context->matchBegin = input.getPos();
1118         context->term = 0;
1119
1120     matchAgain:
1121         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1122
1123         switch (currentTerm().type) {
1124         case ByteTerm::TypeSubpatternBegin:
1125             MATCH_NEXT();
1126         case ByteTerm::TypeSubpatternEnd:
1127             context->matchEnd = input.getPos();
1128             return JSRegExpMatch;
1129
1130         case ByteTerm::TypeBodyAlternativeBegin:
1131             MATCH_NEXT();
1132         case ByteTerm::TypeBodyAlternativeDisjunction:
1133         case ByteTerm::TypeBodyAlternativeEnd:
1134             context->matchEnd = input.getPos();
1135             return JSRegExpMatch;
1136
1137         case ByteTerm::TypeAlternativeBegin:
1138             MATCH_NEXT();
1139         case ByteTerm::TypeAlternativeDisjunction:
1140         case ByteTerm::TypeAlternativeEnd: {
1141             int offset = currentTerm().alternative.end;
1142             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1143             backTrack->offset = offset;
1144             context->term += offset;
1145             MATCH_NEXT();
1146         }
1147
1148         case ByteTerm::TypeAssertionBOL:
1149             if (matchAssertionBOL(currentTerm()))
1150                 MATCH_NEXT();
1151             BACKTRACK();
1152         case ByteTerm::TypeAssertionEOL:
1153             if (matchAssertionEOL(currentTerm()))
1154                 MATCH_NEXT();
1155             BACKTRACK();
1156         case ByteTerm::TypeAssertionWordBoundary:
1157             if (matchAssertionWordBoundary(currentTerm()))
1158                 MATCH_NEXT();
1159             BACKTRACK();
1160
1161         case ByteTerm::TypePatternCharacterOnce:
1162         case ByteTerm::TypePatternCharacterFixed: {
1163             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1164                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
1165                     BACKTRACK();
1166             }
1167             MATCH_NEXT();
1168         }
1169         case ByteTerm::TypePatternCharacterGreedy: {
1170             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1171             unsigned matchAmount = 0;
1172             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1173                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1174                     input.uncheckInput(1);
1175                     break;
1176                 }
1177                 ++matchAmount;
1178             }
1179             backTrack->matchAmount = matchAmount;
1180
1181             MATCH_NEXT();
1182         }
1183         case ByteTerm::TypePatternCharacterNonGreedy: {
1184             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1185             backTrack->matchAmount = 0;
1186             MATCH_NEXT();
1187         }
1188
1189         case ByteTerm::TypePatternCasedCharacterOnce:
1190         case ByteTerm::TypePatternCasedCharacterFixed: {
1191             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1192                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1193                     BACKTRACK();
1194             }
1195             MATCH_NEXT();
1196         }
1197         case ByteTerm::TypePatternCasedCharacterGreedy: {
1198             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1199             unsigned matchAmount = 0;
1200             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1201                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1202                     input.uncheckInput(1);
1203                     break;
1204                 }
1205                 ++matchAmount;
1206             }
1207             backTrack->matchAmount = matchAmount;
1208
1209             MATCH_NEXT();
1210         }
1211         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1212             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1213             backTrack->matchAmount = 0;
1214             MATCH_NEXT();
1215         }
1216
1217         case ByteTerm::TypeCharacterClass:
1218             if (matchCharacterClass(currentTerm(), context))
1219                 MATCH_NEXT();
1220             BACKTRACK();
1221         case ByteTerm::TypeBackReference:
1222             if (matchBackReference(currentTerm(), context))
1223                 MATCH_NEXT();
1224             BACKTRACK();
1225         case ByteTerm::TypeParenthesesSubpattern: {
1226             JSRegExpResult result = matchParentheses(currentTerm(), context);
1227
1228             if (result == JSRegExpMatch) {
1229                 MATCH_NEXT();
1230             }  else if (result != JSRegExpNoMatch)
1231                 return result;
1232
1233             BACKTRACK();
1234         }
1235         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1236             if (matchParenthesesOnceBegin(currentTerm(), context))
1237                 MATCH_NEXT();
1238             BACKTRACK();
1239         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1240             if (matchParenthesesOnceEnd(currentTerm(), context))
1241                 MATCH_NEXT();
1242             BACKTRACK();
1243         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1244             if (matchParenthesesTerminalBegin(currentTerm(), context))
1245                 MATCH_NEXT();
1246             BACKTRACK();
1247         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1248             if (matchParenthesesTerminalEnd(currentTerm(), context))
1249                 MATCH_NEXT();
1250             BACKTRACK();
1251         case ByteTerm::TypeParentheticalAssertionBegin:
1252             if (matchParentheticalAssertionBegin(currentTerm(), context))
1253                 MATCH_NEXT();
1254             BACKTRACK();
1255         case ByteTerm::TypeParentheticalAssertionEnd:
1256             if (matchParentheticalAssertionEnd(currentTerm(), context))
1257                 MATCH_NEXT();
1258             BACKTRACK();
1259
1260         case ByteTerm::TypeCheckInput:
1261             if (input.checkInput(currentTerm().checkInputCount))
1262                 MATCH_NEXT();
1263             BACKTRACK();
1264
1265         case ByteTerm::TypeUncheckInput:
1266             input.uncheckInput(currentTerm().checkInputCount);
1267             MATCH_NEXT();
1268                 
1269         case ByteTerm::TypeDotStarEnclosure:
1270             if (matchDotStarEnclosure(currentTerm(), context))
1271                 return JSRegExpMatch;
1272             BACKTRACK();
1273         }
1274
1275         // We should never fall-through to here.
1276         ASSERT_NOT_REACHED();
1277
1278     backtrack:
1279         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1280
1281         switch (currentTerm().type) {
1282         case ByteTerm::TypeSubpatternBegin:
1283             return JSRegExpNoMatch;
1284         case ByteTerm::TypeSubpatternEnd:
1285             ASSERT_NOT_REACHED();
1286
1287         case ByteTerm::TypeBodyAlternativeBegin:
1288         case ByteTerm::TypeBodyAlternativeDisjunction: {
1289             int offset = currentTerm().alternative.next;
1290             context->term += offset;
1291             if (offset > 0)
1292                 MATCH_NEXT();
1293
1294             if (input.atEnd())
1295                 return JSRegExpNoMatch;
1296
1297             input.next();
1298
1299             context->matchBegin = input.getPos();
1300
1301             if (currentTerm().alternative.onceThrough)
1302                 context->term += currentTerm().alternative.next;
1303
1304             MATCH_NEXT();
1305         }
1306         case ByteTerm::TypeBodyAlternativeEnd:
1307             ASSERT_NOT_REACHED();
1308
1309         case ByteTerm::TypeAlternativeBegin:
1310         case ByteTerm::TypeAlternativeDisjunction: {
1311             int offset = currentTerm().alternative.next;
1312             context->term += offset;
1313             if (offset > 0)
1314                 MATCH_NEXT();
1315             BACKTRACK();
1316         }
1317         case ByteTerm::TypeAlternativeEnd: {
1318             // We should never backtrack back into an alternative of the main body of the regex.
1319             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1320             unsigned offset = backTrack->offset;
1321             context->term -= offset;
1322             BACKTRACK();
1323         }
1324
1325         case ByteTerm::TypeAssertionBOL:
1326         case ByteTerm::TypeAssertionEOL:
1327         case ByteTerm::TypeAssertionWordBoundary:
1328             BACKTRACK();
1329
1330         case ByteTerm::TypePatternCharacterOnce:
1331         case ByteTerm::TypePatternCharacterFixed:
1332         case ByteTerm::TypePatternCharacterGreedy:
1333         case ByteTerm::TypePatternCharacterNonGreedy:
1334             if (backtrackPatternCharacter(currentTerm(), context))
1335                 MATCH_NEXT();
1336             BACKTRACK();
1337         case ByteTerm::TypePatternCasedCharacterOnce:
1338         case ByteTerm::TypePatternCasedCharacterFixed:
1339         case ByteTerm::TypePatternCasedCharacterGreedy:
1340         case ByteTerm::TypePatternCasedCharacterNonGreedy:
1341             if (backtrackPatternCasedCharacter(currentTerm(), context))
1342                 MATCH_NEXT();
1343             BACKTRACK();
1344         case ByteTerm::TypeCharacterClass:
1345             if (backtrackCharacterClass(currentTerm(), context))
1346                 MATCH_NEXT();
1347             BACKTRACK();
1348         case ByteTerm::TypeBackReference:
1349             if (backtrackBackReference(currentTerm(), context))
1350                 MATCH_NEXT();
1351             BACKTRACK();
1352         case ByteTerm::TypeParenthesesSubpattern: {
1353             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1354
1355             if (result == JSRegExpMatch) {
1356                 MATCH_NEXT();
1357             } else if (result != JSRegExpNoMatch)
1358                 return result;
1359
1360             BACKTRACK();
1361         }
1362         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1363             if (backtrackParenthesesOnceBegin(currentTerm(), context))
1364                 MATCH_NEXT();
1365             BACKTRACK();
1366         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1367             if (backtrackParenthesesOnceEnd(currentTerm(), context))
1368                 MATCH_NEXT();
1369             BACKTRACK();
1370         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1371             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1372                 MATCH_NEXT();
1373             BACKTRACK();
1374         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1375             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1376                 MATCH_NEXT();
1377             BACKTRACK();
1378         case ByteTerm::TypeParentheticalAssertionBegin:
1379             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1380                 MATCH_NEXT();
1381             BACKTRACK();
1382         case ByteTerm::TypeParentheticalAssertionEnd:
1383             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1384                 MATCH_NEXT();
1385             BACKTRACK();
1386
1387         case ByteTerm::TypeCheckInput:
1388             input.uncheckInput(currentTerm().checkInputCount);
1389             BACKTRACK();
1390
1391         case ByteTerm::TypeUncheckInput:
1392             input.checkInput(currentTerm().checkInputCount);
1393             BACKTRACK();
1394
1395         case ByteTerm::TypeDotStarEnclosure:
1396             ASSERT_NOT_REACHED();
1397         }
1398
1399         ASSERT_NOT_REACHED();
1400         return JSRegExpErrorNoMatch;
1401     }
1402
1403     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1404     {
1405         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1406
1407         if (result == JSRegExpMatch) {
1408             while (context->matchBegin == context->matchEnd) {
1409                 result = matchDisjunction(disjunction, context, true);
1410                 if (result != JSRegExpMatch)
1411                     return result;
1412             }
1413             return JSRegExpMatch;
1414         }
1415
1416         return result;
1417     }
1418
1419     unsigned interpret()
1420     {
1421         if (!input.isAvailableInput(0))
1422             return offsetNoMatch;
1423
1424         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1425             output[i << 1] = offsetNoMatch;
1426
1427         allocatorPool = pattern->m_allocator->startAllocator();
1428         if (!allocatorPool)
1429             CRASH();
1430
1431         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1432
1433         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1434         if (result == JSRegExpMatch) {
1435             output[0] = context->matchBegin;
1436             output[1] = context->matchEnd;
1437         }
1438
1439         freeDisjunctionContext(context);
1440
1441         pattern->m_allocator->stopAllocator();
1442
1443         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1444         return output[0];
1445     }
1446
1447     Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1448         : pattern(pattern)
1449         , output(output)
1450         , input(input, start, length)
1451         , allocatorPool(0)
1452         , remainingMatchCount(matchLimit)
1453     {
1454     }
1455
1456 private:
1457     BytecodePattern* pattern;
1458     unsigned* output;
1459     InputStream input;
1460     BumpPointerPool* allocatorPool;
1461     unsigned remainingMatchCount;
1462 };
1463
1464
1465
1466 class ByteCompiler {
1467     struct ParenthesesStackEntry {
1468         unsigned beginTerm;
1469         unsigned savedAlternativeIndex;
1470         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1471             : beginTerm(beginTerm)
1472             , savedAlternativeIndex(savedAlternativeIndex)
1473         {
1474         }
1475     };
1476
1477 public:
1478     ByteCompiler(YarrPattern& pattern)
1479         : m_pattern(pattern)
1480     {
1481         m_currentAlternativeIndex = 0;
1482     }
1483
1484     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1485     {
1486         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1487         emitDisjunction(m_pattern.m_body);
1488         regexEnd();
1489
1490         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1491     }
1492
1493     void checkInput(unsigned count)
1494     {
1495         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1496     }
1497
1498     void uncheckInput(unsigned count)
1499     {
1500         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1501     }
1502     
1503     void assertionBOL(unsigned inputPosition)
1504     {
1505         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1506     }
1507
1508     void assertionEOL(unsigned inputPosition)
1509     {
1510         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1511     }
1512
1513     void assertionWordBoundary(bool invert, unsigned inputPosition)
1514     {
1515         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1516     }
1517
1518     void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1519     {
1520         if (m_pattern.m_ignoreCase) {
1521             UChar lo = Unicode::toLower(ch);
1522             UChar hi = Unicode::toUpper(ch);
1523
1524             if (lo != hi) {
1525                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1526                 return;
1527             }
1528         }
1529
1530         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1531     }
1532
1533     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1534     {
1535         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1536
1537         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1538         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1539         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1540     }
1541
1542     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1543     {
1544         ASSERT(subpatternId);
1545
1546         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1547
1548         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1549         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1550         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1551     }
1552
1553     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1554     {
1555         int beginTerm = m_bodyDisjunction->terms.size();
1556
1557         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1558         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1559         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1560         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1561
1562         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1563         m_currentAlternativeIndex = beginTerm + 1;
1564     }
1565
1566     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1567     {
1568         int beginTerm = m_bodyDisjunction->terms.size();
1569
1570         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1571         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1572         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1573         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1574
1575         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1576         m_currentAlternativeIndex = beginTerm + 1;
1577     }
1578
1579     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1580     {
1581         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1582         // then fix this up at the end! - simplifying this should make it much clearer.
1583         // https://bugs.webkit.org/show_bug.cgi?id=50136
1584
1585         int beginTerm = m_bodyDisjunction->terms.size();
1586
1587         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1588         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1589         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1590         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1591
1592         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1593         m_currentAlternativeIndex = beginTerm + 1;
1594     }
1595
1596     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1597     {
1598         int beginTerm = m_bodyDisjunction->terms.size();
1599
1600         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1601         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1602         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1603         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1604
1605         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1606         m_currentAlternativeIndex = beginTerm + 1;
1607     }
1608
1609     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1610     {
1611         unsigned beginTerm = popParenthesesStack();
1612         closeAlternative(beginTerm + 1);
1613         unsigned endTerm = m_bodyDisjunction->terms.size();
1614
1615         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1616
1617         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1618         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1619
1620         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1621         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1622         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1623         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1624
1625         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1626         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1627         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1628         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1629     }
1630
1631     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1632     {
1633         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1634     }
1635
1636     unsigned popParenthesesStack()
1637     {
1638         ASSERT(m_parenthesesStack.size());
1639         int stackEnd = m_parenthesesStack.size() - 1;
1640         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1641         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1642         m_parenthesesStack.shrink(stackEnd);
1643
1644         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1645         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1646
1647         return beginTerm;
1648     }
1649
1650 #ifndef NDEBUG
1651     void dumpDisjunction(ByteDisjunction* disjunction)
1652     {
1653         dataLog("ByteDisjunction(%p):\n\t", disjunction);
1654         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1655             dataLog("{ %d } ", disjunction->terms[i].type);
1656         dataLog("\n");
1657     }
1658 #endif
1659
1660     void closeAlternative(int beginTerm)
1661     {
1662         int origBeginTerm = beginTerm;
1663         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1664         int endIndex = m_bodyDisjunction->terms.size();
1665
1666         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1667
1668         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1669             m_bodyDisjunction->terms.remove(beginTerm);
1670         else {
1671             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1672                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1673                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1674                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1675                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1676             }
1677
1678             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1679
1680             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1681             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1682         }
1683     }
1684
1685     void closeBodyAlternative()
1686     {
1687         int beginTerm = 0;
1688         int origBeginTerm = 0;
1689         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1690         int endIndex = m_bodyDisjunction->terms.size();
1691
1692         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1693
1694         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1695             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1696             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1697             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1698             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1699         }
1700
1701         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1702
1703         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1704         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1705     }
1706
1707     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1708     {
1709         unsigned beginTerm = popParenthesesStack();
1710         closeAlternative(beginTerm + 1);
1711         unsigned endTerm = m_bodyDisjunction->terms.size();
1712
1713         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1714
1715         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1716
1717         bool capture = parenthesesBegin.capture();
1718         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1719
1720         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1721         ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1722
1723         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1724         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1725             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1726         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1727
1728         m_bodyDisjunction->terms.shrink(beginTerm);
1729
1730         m_allParenthesesInfo.append(parenthesesDisjunction);
1731         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1732
1733         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1734         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1735         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1736     }
1737
1738     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1739     {
1740         unsigned beginTerm = popParenthesesStack();
1741         closeAlternative(beginTerm + 1);
1742         unsigned endTerm = m_bodyDisjunction->terms.size();
1743
1744         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1745
1746         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1747         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1748
1749         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1750         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1751         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1752         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1753
1754         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1755         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1756         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1757         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1758     }
1759
1760     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1761     {
1762         unsigned beginTerm = popParenthesesStack();
1763         closeAlternative(beginTerm + 1);
1764         unsigned endTerm = m_bodyDisjunction->terms.size();
1765
1766         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1767
1768         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1769         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1770
1771         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1772         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1773         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1774         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1775
1776         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1777         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1778         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1779         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1780     }
1781
1782     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1783     {
1784         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1785         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1786         m_bodyDisjunction->terms[0].frameLocation = 0;
1787         m_currentAlternativeIndex = 0;
1788     }
1789
1790     void regexEnd()
1791     {
1792         closeBodyAlternative();
1793     }
1794
1795     void alternativeBodyDisjunction(bool onceThrough)
1796     {
1797         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1798         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1799         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1800
1801         m_currentAlternativeIndex = newAlternativeIndex;
1802     }
1803
1804     void alternativeDisjunction()
1805     {
1806         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1807         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1808         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1809
1810         m_currentAlternativeIndex = newAlternativeIndex;
1811     }
1812
1813     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1814     {
1815         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1816             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1817
1818             PatternAlternative* alternative = disjunction->m_alternatives[alt];
1819
1820             if (alt) {
1821                 if (disjunction == m_pattern.m_body)
1822                     alternativeBodyDisjunction(alternative->onceThrough());
1823                 else
1824                     alternativeDisjunction();
1825             }
1826
1827             unsigned minimumSize = alternative->m_minimumSize;
1828             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1829             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1830
1831             if (countToCheck) {
1832                 checkInput(countToCheck);
1833                 currentCountAlreadyChecked += countToCheck;
1834             }
1835
1836             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1837                 PatternTerm& term = alternative->m_terms[i];
1838
1839                 switch (term.type) {
1840                 case PatternTerm::TypeAssertionBOL:
1841                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1842                     break;
1843
1844                 case PatternTerm::TypeAssertionEOL:
1845                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1846                     break;
1847
1848                 case PatternTerm::TypeAssertionWordBoundary:
1849                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1850                     break;
1851
1852                 case PatternTerm::TypePatternCharacter:
1853                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1854                     break;
1855
1856                 case PatternTerm::TypeCharacterClass:
1857                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1858                     break;
1859
1860                 case PatternTerm::TypeBackReference:
1861                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1862                         break;
1863
1864                 case PatternTerm::TypeForwardReference:
1865                     break;
1866
1867                 case PatternTerm::TypeParenthesesSubpattern: {
1868                     unsigned disjunctionAlreadyCheckedCount = 0;
1869                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1870                         unsigned alternativeFrameLocation = term.frameLocation;
1871                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1872                         if (term.quantityType == QuantifierFixedCount)
1873                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1874                         else
1875                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1876                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1877                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1878                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1879                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1880                     } else if (term.parentheses.isTerminal) {
1881                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1882                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1883                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1884                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1885                     } else {
1886                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1887                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1888                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1889                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1890                     }
1891                     break;
1892                 }
1893
1894                 case PatternTerm::TypeParentheticalAssertion: {
1895                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1896
1897                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1898                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1899                     unsigned uncheckAmount = 0;
1900                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1901                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1902                         uncheckInput(uncheckAmount);
1903                         currentCountAlreadyChecked -= uncheckAmount;
1904                     }
1905
1906                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1907                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1908                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1909                     if (uncheckAmount) {
1910                         checkInput(uncheckAmount);
1911                         currentCountAlreadyChecked += uncheckAmount;
1912                     }
1913                     break;
1914                 }
1915
1916                 case PatternTerm::TypeDotStarEnclosure:
1917                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1918                     break;
1919                 }
1920             }
1921         }
1922     }
1923
1924 private:
1925     YarrPattern& m_pattern;
1926     OwnPtr<ByteDisjunction> m_bodyDisjunction;
1927     unsigned m_currentAlternativeIndex;
1928     Vector<ParenthesesStackEntry> m_parenthesesStack;
1929     Vector<ByteDisjunction*> m_allParenthesesInfo;
1930 };
1931
1932 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1933 {
1934     return ByteCompiler(pattern).compile(allocator);
1935 }
1936
1937 unsigned interpret(BytecodePattern* bytecode, const UString& input, unsigned start, unsigned* output)
1938 {
1939     if (input.is8Bit())
1940         return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
1941     return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
1942 }
1943
1944 unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1945 {
1946     return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
1947 }
1948
1949 unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
1950 {
1951     return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
1952 }
1953
1954 // These should be the same for both UChar & LChar.
1955 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1956 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1957 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1958 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1959 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1960 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1961 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1962
1963
1964 } }