Imported Upstream version 1.0.0
[platform/upstream/js.git] / js / src / yarr / pcre / pcre_exec.cpp
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7                  Originally written by Philip Hazel
8            Copyright (c) 1997-2006 University of Cambridge
9     Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
10     Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "pcre_internal.h"
46
47 #include <limits.h>
48 #include "yarr/jswtfbridge.h"
49 #include "yarr/wtf/ASCIICType.h"
50 #include "jsarena.h"
51 #include "jscntxt.h"
52
53 using namespace WTF;
54
55 #if !WTF_COMPILER_MSVC && !WTF_COMPILER_SUNPRO
56 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
57 #endif
58
59 /* Note: Webkit sources have USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP disabled. */
60 /* Note: There are hardcoded constants all over the place, but in the port of
61  Yarr to TraceMonkey two bytes are added to the OP_BRA* opcodes, so the
62  instruction stream now looks like this at the start of a bracket group:
63
64     OP_BRA* [link:LINK_SIZE] [minNestedBracket,maxNestedBracket:2]
65
66  Both capturing and non-capturing brackets encode this information. */
67
68 /* Avoid warnings on Windows. */
69 #undef min
70 #undef max
71
72 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
73 typedef int ReturnLocation;
74 #else
75 typedef void* ReturnLocation;
76 #endif
77
78 /* Node on a stack of brackets. This is used to detect and reject
79  matches of the empty string per ECMAScript repeat match rules. This
80  also prevents infinite loops on quantified empty matches. One node
81  represents the start state at the start of this bracket group. */
82 struct BracketChainNode {
83     BracketChainNode* previousBracket;
84     const UChar* bracketStart;
85     /* True if the minimum number of matches was already satisfied
86      when we started matching this group. */
87     bool minSatisfied;
88 };
89
90 struct MatchFrame {
91     ReturnLocation returnLocation;
92     struct MatchFrame* previousFrame;
93     int *savedOffsets;
94     /* The frame allocates saved offsets into the regular expression arena pool so
95      that they can be restored during backtracking. */
96     size_t savedOffsetsSize;
97     JSArenaPool *regExpPool;
98
99     MatchFrame() : savedOffsetsSize(0), regExpPool(0) {}
100     void init(JSArenaPool *regExpPool) { this->regExpPool = regExpPool; }
101     
102     /* Function arguments that may change */
103     struct {
104         const UChar* subjectPtr;
105         const unsigned char* instructionPtr;
106         int offsetTop;
107         BracketChainNode* bracketChain;
108     } args;
109     
110     
111     /* PCRE uses "fake" recursion built off of gotos, thus
112      stack-based local variables are not safe to use.  Instead we have to
113      store local variables on the current MatchFrame. */
114     struct {
115         const unsigned char* data;
116         const unsigned char* startOfRepeatingBracket;
117         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
118         const unsigned char* instructionPtrAtStartOfOnce;
119         
120         int repeatOthercase;
121         int savedSubjectOffset;
122         
123         int ctype;
124         int fc;
125         int fi;
126         int length;
127         int max;
128         int number;
129         int offset;
130         int skipBytes;
131         int minBracket;
132         int limitBracket;
133         int bracketsBefore;
134         bool minSatisfied;
135         
136         BracketChainNode bracketChainNode;
137     } locals;
138
139     void saveOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
140         JS_ASSERT(regExpPool);
141         JS_ASSERT(minBracket >= 0);
142         JS_ASSERT(limitBracket >= minBracket);
143         JS_ASSERT(offsetEnd >= 0);
144         if (minBracket == limitBracket)
145             return;
146         const size_t newSavedOffsetCount = 3 * (limitBracket - minBracket);
147         /* Increase saved offset space if necessary. */
148         {
149             size_t targetSize = sizeof(*savedOffsets) * newSavedOffsetCount;
150             if (savedOffsetsSize < targetSize) {
151                 JS_ARENA_ALLOCATE_CAST(savedOffsets, int *, regExpPool, targetSize);
152                 JS_ASSERT(savedOffsets); /* FIXME: error code, bug 574459. */
153                 savedOffsetsSize = targetSize;
154             }
155         }
156         for (unsigned i = 0; i < unsigned(limitBracket - minBracket); ++i) {
157             int bracketIter = minBracket + i;
158             JS_ASSERT(2 * bracketIter + 1 <= offsetEnd);
159             int start = offsets[2 * bracketIter];
160             int end = offsets[2 * bracketIter + 1];
161             JS_ASSERT(bracketIter <= offsetEnd);
162             int offset = offsets[offsetEnd - bracketIter];
163             DPRINTF(("saving bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset));
164             JS_ASSERT(start <= end);
165             JS_ASSERT(i * 3 + 2 < newSavedOffsetCount);
166             savedOffsets[i * 3 + 0] = start;
167             savedOffsets[i * 3 + 1] = end;
168             savedOffsets[i * 3 + 2] = offset;
169         }
170     }
171
172     void clobberOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
173         for (int i = 0; i < limitBracket - minBracket; ++i) {
174             int bracketIter = minBracket + i;
175             JS_ASSERT(2 * bracketIter + 1 < offsetEnd);
176             offsets[2 * bracketIter + 0] = -1;
177             offsets[2 * bracketIter + 1] = -1;
178         }
179     }
180
181     void restoreOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
182         JS_ASSERT(regExpPool);
183         JS_ASSERT_IF(limitBracket > minBracket, savedOffsets);
184         for (int i = 0; i < limitBracket - minBracket; ++i) {
185             int bracketIter = minBracket + i;
186             int start = savedOffsets[i * 3 + 0];
187             int end = savedOffsets[i * 3 + 1];
188             int offset = savedOffsets[i * 3 + 2];
189             DPRINTF(("restoring bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset));
190             JS_ASSERT(start <= end);
191             offsets[2 * bracketIter + 0] = start;
192             offsets[2 * bracketIter + 1] = end;
193             offsets[offsetEnd - bracketIter] = offset;
194         }
195     }
196
197     /* Extract the bracket data after the current opcode/link at |instructionPtr| into the locals. */
198     void extractBrackets(const unsigned char *instructionPtr) {
199         uint16 bracketMess = get2ByteValue(instructionPtr + 1 + LINK_SIZE);
200         locals.minBracket = (bracketMess >> 8) & 0xff;
201         locals.limitBracket = (bracketMess & 0xff);
202         JS_ASSERT(locals.minBracket <= locals.limitBracket);
203     }
204
205     /* At the start of a bracketed group, add the current subject pointer to the
206      stack of such pointers, to be re-instated at the end of the group when we hit
207      the closing ket. When match() is called in other circumstances, we don't add to
208      this stack. */
209     void startNewGroup(bool minSatisfied) {
210         locals.bracketChainNode.previousBracket = args.bracketChain;
211         locals.bracketChainNode.bracketStart = args.subjectPtr;
212         locals.bracketChainNode.minSatisfied = minSatisfied;
213         args.bracketChain = &locals.bracketChainNode;
214     }
215 };
216
217 /* Structure for passing "static" information around between the functions
218 doing traditional NFA matching, so that they are thread-safe. */
219
220 struct MatchData {
221     int             *offsetVector;  /* Offset vector */
222     int             offsetEnd;      /* One past the end */
223     int             offsetMax;      /* The maximum usable for return data */
224     bool            offsetOverflow; /* Set if too many extractions */
225     const UChar     *startSubject;  /* Start of the subject string */
226     const UChar     *endSubject;    /* End of the subject string */
227     const UChar     *endMatchPtr;   /* Subject position at end match */
228     int             endOffsetTop;   /* Highwater mark at end of match */
229     bool            multiline;
230     bool            ignoreCase;
231
232     void setOffsetPair(size_t pairNum, int start, int end) {
233         JS_ASSERT(int(2 * pairNum + 1) < offsetEnd && int(pairNum) < offsetEnd);
234         JS_ASSERT(start <= end);
235         JS_ASSERT_IF(start < 0, start == end && start == -1);
236         DPRINTF(("setting offset pair at %u (%d, %d)\n", pairNum, start, end));
237         offsetVector[2 * pairNum + 0] = start;
238         offsetVector[2 * pairNum + 1] = end;
239     }
240 };
241
242 /* The maximum remaining length of subject we are prepared to search for a
243 reqByte match. */
244
245 #define REQ_BYTE_MAX 1000
246
247 /* The below limit restricts the number of "recursive" match calls in order to
248 avoid spending exponential time on complex regular expressions. */
249
250 static const unsigned matchLimit = 1000000;
251
252 /*************************************************
253 *          Match a back-reference                *
254 *************************************************/
255
256 /* If a back reference hasn't been set, the length that is passed is greater
257 than the number of characters left in the string, so the match fails.
258
259 Arguments:
260   offset      index into the offset vector
261   subjectPtr        points into the subject
262   length      length to be matched
263   md          points to match data block
264
265 Returns:      true if matched
266 */
267
268 static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
269 {
270     const UChar* p = md.startSubject + md.offsetVector[offset];
271     
272     /* Always fail if not enough characters left */
273     
274     if (length > md.endSubject - subjectPtr)
275         return false;
276     
277     /* Separate the caselesss case for speed */
278     
279     if (md.ignoreCase) {
280         while (length-- > 0) {
281             UChar c = *p++;
282             int othercase = jsc_pcre_ucp_othercase(c);
283             UChar d = *subjectPtr++;
284             if (c != d && othercase != d)
285                 return false;
286         }
287     }
288     else {
289         while (length-- > 0)
290             if (*p++ != *subjectPtr++)
291                 return false;
292     }
293     
294     return true;
295 }
296
297 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
298
299 /* Use numbered labels and switch statement at the bottom of the match function. */
300
301 #define RMATCH_WHERE(num) num
302 #define RRETURN_LABEL RRETURN_SWITCH
303
304 #else
305
306 /* Use GCC's computed goto extension. */
307
308 /* For one test case this is more than 40% faster than the switch statement.
309 We could avoid the use of the num argument entirely by using local labels,
310 but using it for the GCC case as well as the non-GCC case allows us to share
311 a bit more code and notice if we use conflicting numbers.*/
312
313 #define RMATCH_WHERE(num) JS_EXTENSION(&&RRETURN_##num)
314 #define RRETURN_LABEL *stack.currentFrame->returnLocation
315
316 #endif
317
318 #define RECURSIVE_MATCH_COMMON(num) \
319     goto RECURSE;\
320     RRETURN_##num: \
321     stack.popCurrentFrame();
322
323 #define RECURSIVE_MATCH(num, ra, rb) \
324     do { \
325         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
326         RECURSIVE_MATCH_COMMON(num) \
327     } while (0)
328
329 #define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb, gm) \
330     do { \
331         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
332         stack.currentFrame->startNewGroup(gm); \
333         RECURSIVE_MATCH_COMMON(num) \
334     } while (0)
335
336 #define RRETURN do { JS_EXTENSION_(goto RRETURN_LABEL); } while (0)
337
338 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
339
340 /*************************************************
341 *         Match from current position            *
342 *************************************************/
343
344 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
345 in the subject string, while substringStart holds the value of subjectPtr at the start of the
346 last bracketed group - used for breaking infinite loops matching zero-length
347 strings. This function is called recursively in many circumstances. Whenever it
348 returns a negative (error) response, the outer match() call must also return the
349 same response.
350
351 Arguments:
352    subjectPtr        pointer in subject
353    instructionPtr       position in code
354    offsetTop  current top pointer
355    md          pointer to "static" info for the match
356
357 Returns:       1 if matched          )  these values are >= 0
358                0 if failed to match  )
359                a negative error value if aborted by an error condition
360                  (e.g. stopped by repeated call or recursion limit)
361 */
362
363 static const unsigned numFramesOnStack = 16;
364
365 struct MatchStack {
366     JSArenaPool *regExpPool;
367     void *regExpPoolMark;
368
369     MatchStack(JSArenaPool *regExpPool)
370         : regExpPool(regExpPool)
371         , regExpPoolMark(JS_ARENA_MARK(regExpPool))
372         , framesEnd(frames + numFramesOnStack)
373         , currentFrame(frames)
374         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
375     {
376         JS_ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
377         JS_ASSERT(regExpPool);
378         for (size_t i = 0; i < numFramesOnStack; ++i)
379             frames[i].init(regExpPool);
380     }
381
382     ~MatchStack() { JS_ARENA_RELEASE(regExpPool, regExpPoolMark); }
383     
384     MatchFrame frames[numFramesOnStack];
385     MatchFrame* framesEnd;
386     MatchFrame* currentFrame;
387     unsigned size;
388     
389     bool canUseStackBufferForNextFrame() {
390         return size < numFramesOnStack;
391     }
392     
393     MatchFrame* allocateNextFrame() {
394         if (canUseStackBufferForNextFrame())
395             return currentFrame + 1;
396         // FIXME: bug 574459 -- no NULL check
397         MatchFrame *frame = js_new<MatchFrame>();
398         frame->init(regExpPool);
399         return frame;
400     }
401     
402     void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation) {
403         MatchFrame* newframe = allocateNextFrame();
404         newframe->previousFrame = currentFrame;
405
406         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
407         newframe->args.offsetTop = currentFrame->args.offsetTop;
408         newframe->args.instructionPtr = instructionPtr;
409         newframe->args.bracketChain = bracketChain;
410         newframe->returnLocation = returnLocation;
411         size++;
412
413         currentFrame = newframe;
414     }
415     
416     void popCurrentFrame() {
417         MatchFrame* oldFrame = currentFrame;
418         currentFrame = currentFrame->previousFrame;
419         if (size > numFramesOnStack)
420             js_delete(oldFrame);
421         size--;
422     }
423
424     void popAllFrames() {
425         while (size)
426             popCurrentFrame();
427     }
428 };
429
430 static int matchError(int errorCode, MatchStack& stack)
431 {
432     stack.popAllFrames();
433     return errorCode;
434 }
435
436 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
437  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
438
439 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
440 {
441     c = *subjectPtr;
442     if ((c & 0xc0) == 0xc0) {
443         int gcaa = jsc_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
444         int gcss = 6 * gcaa;
445         c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
446         for (int gcii = 1; gcii <= gcaa; gcii++) {
447             gcss -= 6;
448             c |= (subjectPtr[gcii] & 0x3f) << gcss;
449         }
450         len += gcaa;
451     }
452 }
453
454 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
455 {
456     // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
457     static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
458     static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
459
460     JS_ASSERT(instructionOffset >= 0);
461     JS_ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
462
463     minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
464     minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
465     maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
466 }
467
468 /* Helper class for passing a flag value from one op to the next that runs.
469  This allows us to set the flag in certain ops. When the flag is read, it
470  will be true only if the previous op set the flag, otherwise it is false. */
471 class LinearFlag {
472 public:
473     LinearFlag() : flag(false) {}
474     
475     bool readAndClear() {
476         bool rv = flag;
477         flag = false;
478         return rv;
479     }
480
481     void set() {
482         flag = true;
483     }
484
485 private:
486     bool flag;
487 };
488
489 static int
490 match(JSArenaPool *regExpPool, const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
491 {
492     bool isMatch = false;
493     int min;
494     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
495     unsigned remainingMatchCount = matchLimit;
496     int othercase; /* Declare here to avoid errors during jumps */
497     bool minSatisfied;
498     
499     MatchStack stack(regExpPool);
500     LinearFlag minSatNextBracket;
501
502     /* The opcode jump table. */
503 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
504 #define EMIT_JUMP_TABLE_ENTRY(opcode) JS_EXTENSION(&&LABEL_OP_##opcode)
505     static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
506 #undef EMIT_JUMP_TABLE_ENTRY
507 #endif
508     
509     /* One-time setup of the opcode jump table. */
510 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
511     for (int i = 255; !opcodeJumpTable[i]; i--)
512         opcodeJumpTable[i] = &&CAPTURING_BRACKET;
513 #endif
514     
515 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
516     // Shark shows this as a hot line
517     // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
518     stack.currentFrame->returnLocation = JS_EXTENSION(&&RETURN);
519 #else
520     stack.currentFrame->returnLocation = 0;
521 #endif
522     stack.currentFrame->args.subjectPtr = subjectPtr;
523     stack.currentFrame->args.instructionPtr = instructionPtr;
524     stack.currentFrame->args.offsetTop = offsetTop;
525     stack.currentFrame->args.bracketChain = 0;
526     stack.currentFrame->startNewGroup(false);
527     
528     /* This is where control jumps back to to effect "recursion" */
529     
530 RECURSE:
531     if (!--remainingMatchCount)
532         return matchError(JSRegExpErrorHitLimit, stack);
533
534     /* Now start processing the operations. */
535     
536 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
537     while (true)
538 #endif
539     {
540         
541 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
542 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
543 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
544 #else
545 #define BEGIN_OPCODE(opcode) case OP_##opcode
546 #define NEXT_OPCODE continue
547 #endif
548 #define LOCALS(__ident) (stack.currentFrame->locals.__ident)
549         
550 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
551         NEXT_OPCODE;
552 #else
553         switch (*stack.currentFrame->args.instructionPtr)
554 #endif
555         {
556             /* Non-capturing bracket: optimized */
557                 
558             BEGIN_OPCODE(BRA):
559             NON_CAPTURING_BRACKET:
560                 DPRINTF(("start non-capturing bracket\n"));
561                 stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr);
562                 /* If we see no ALT, we have to skip three bytes of bracket data (link plus nested
563                  bracket data. */
564                 stack.currentFrame->locals.skipBytes = 3;
565                 /* We must compute this value at the top, before we move the instruction pointer. */
566                 stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear();
567                 do {
568                     /* We need to extract this into a variable so we can correctly pass it by value
569                      through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */
570                     minSatisfied = stack.currentFrame->locals.minSatisfied;
571                     RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied);
572                     if (isMatch) {
573                         DPRINTF(("non-capturing bracket succeeded\n"));
574                         RRETURN;
575                     }
576                     stack.currentFrame->locals.skipBytes = 1;
577                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
578                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
579                 DPRINTF(("non-capturing bracket failed\n"));
580                 for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
581                     md.setOffsetPair(i, -1, -1);
582                 RRETURN;
583                 
584             /* Skip over large extraction number data if encountered. */
585                 
586             BEGIN_OPCODE(BRANUMBER):
587                 stack.currentFrame->args.instructionPtr += 3;
588                 NEXT_OPCODE;
589                 
590             /* End of the pattern. */
591                 
592             BEGIN_OPCODE(END):
593                 md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
594                 md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */
595                 isMatch = true;
596                 RRETURN;
597                 
598             /* Assertion brackets. Check the alternative branches in turn - the
599              matching won't pass the KET for an assertion. If any one branch matches,
600              the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
601              start of each branch to move the current point backwards, so the code at
602              this level is identical to the lookahead case. */
603                 
604             BEGIN_OPCODE(ASSERT):
605                 {
606                     uint16 bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE);
607                     LOCALS(minBracket) = (bracketMess >> 8) & 0xff;
608                     LOCALS(limitBracket) = bracketMess & 0xff;
609                     JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket));
610                 }
611                 stack.currentFrame->locals.skipBytes = 3;
612                 do {
613                     RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false);
614                     if (isMatch)
615                         break;
616                     stack.currentFrame->locals.skipBytes = 1;
617                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
618                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
619                 if (*stack.currentFrame->args.instructionPtr == OP_KET) {
620                     for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
621                         md.setOffsetPair(i, -1, -1);
622                     RRETURN_NO_MATCH;
623                 }
624                 
625                 /* Continue from after the assertion, updating the offsets high water
626                  mark, since extracts may have been taken during the assertion. */
627                 
628                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
629                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
630                 stack.currentFrame->args.offsetTop = md.endOffsetTop;
631                 NEXT_OPCODE;
632                 
633             /* Negative assertion: all branches must fail to match */
634                 
635             BEGIN_OPCODE(ASSERT_NOT):
636                 stack.currentFrame->locals.skipBytes = 3;
637                 {
638                     unsigned bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE);
639                     LOCALS(minBracket) = (bracketMess >> 8) & 0xff;
640                     LOCALS(limitBracket) = bracketMess & 0xff;
641                 }
642                 JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket));
643                 do {
644                     RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false);
645                     if (isMatch)
646                         RRETURN_NO_MATCH;
647                     stack.currentFrame->locals.skipBytes = 1;
648                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
649                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
650                 
651                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.skipBytes + LINK_SIZE;
652                 NEXT_OPCODE;
653                 
654             /* An alternation is the end of a branch; scan along to find the end of the
655              bracketed group and go to there. */
656                 
657             BEGIN_OPCODE(ALT):
658                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
659                 NEXT_OPCODE;
660                 
661             /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
662              that it may occur zero times. It may repeat infinitely, or not at all -
663              i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
664              repeat limits are compiled as a number of copies, with the optional ones
665              preceded by BRAZERO or BRAMINZERO. */
666                 
667             BEGIN_OPCODE(BRAZERO): {
668                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
669                 stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr + 1);
670                 stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
671                 minSatNextBracket.set();
672                 RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain, true);
673                 if (isMatch)
674                     RRETURN;
675                 stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
676                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
677                 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
678                 NEXT_OPCODE;
679             }
680                 
681             BEGIN_OPCODE(BRAMINZERO): {
682                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
683                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
684                 RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain, false);
685                 if (isMatch)
686                     RRETURN;
687                 stack.currentFrame->args.instructionPtr++;
688                 NEXT_OPCODE;
689             }
690                 
691             /* End of a group, repeated or non-repeating. If we are at the end of
692              an assertion "group", stop matching and return 1, but record the
693              current high water mark for use by positive assertions. Do this also
694              for the "once" (not-backup up) groups. */
695                 
696             BEGIN_OPCODE(KET):
697             BEGIN_OPCODE(KETRMIN):
698             BEGIN_OPCODE(KETRMAX):
699                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
700                 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
701                 stack.currentFrame->locals.minSatisfied = stack.currentFrame->args.bracketChain->minSatisfied;
702
703                 /* Back up the stack of bracket start pointers. */
704
705                 stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
706
707                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
708                     md.endOffsetTop = stack.currentFrame->args.offsetTop;
709                     isMatch = true;
710                     RRETURN;
711                 }
712                 
713                 /* In all other cases except a conditional group we have to check the
714                  group number back at the start and if necessary complete handling an
715                  extraction by setting the offsets and bumping the high water mark. */
716                 
717                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
718                 
719                 /* For extended extraction brackets (large number), we have to fish out
720                  the number from a dummy opcode at the start. */
721                 
722                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
723                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 4 + LINK_SIZE);
724                 stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number;
725                 
726                 DPRINTF(("end bracket %d\n", stack.currentFrame->locals.number));
727                 
728                 /* Test for a numbered group. This includes groups called as a result
729                  of recursion. Note that whole-pattern recursion is coded as a recurse
730                  into group 0, so it won't be picked up here. Instead, we catch it when
731                  the OP_END is reached. */
732                 
733                 if (stack.currentFrame->locals.number > 0) {
734                     if (stack.currentFrame->locals.offset >= md.offsetMax)
735                         md.offsetOverflow = true;
736                     else {
737                         int start = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
738                         int end = stack.currentFrame->args.subjectPtr - md.startSubject;
739                         if (start == end && stack.currentFrame->locals.minSatisfied) {
740                             DPRINTF(("empty string while group already matched; bailing"));
741                             RRETURN_NO_MATCH;
742                         }
743                         DPRINTF(("saving; start: %d; end: %d\n", start, end));
744                         JS_ASSERT(start <= end);
745                         md.setOffsetPair(stack.currentFrame->locals.number, start, end);
746                         if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
747                             stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
748                     }
749                 }
750                 
751                 /* For a non-repeating ket, just continue at this level. This also
752                  happens for a repeating ket if no characters were matched in the group.
753                  This is the forcible breaking of infinite loops as implemented in Perl
754                  5.005. If there is an options reset, it will get obeyed in the normal
755                  course of events. */
756                 
757                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
758                     DPRINTF(("non-repeating ket or empty match\n"));
759                     if (stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction && stack.currentFrame->locals.minSatisfied) {
760                         DPRINTF(("empty string while group already matched; bailing"));
761                         RRETURN_NO_MATCH;
762                     }
763                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
764                     NEXT_OPCODE;
765                 }
766                 
767                 /* The repeating kets try the rest of the pattern or restart from the
768                  preceding bracket, in the appropriate order. */
769                 
770                 stack.currentFrame->extractBrackets(LOCALS(instructionPtrAtStartOfOnce));
771                 JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket));
772                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
773                     stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
774                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
775                     if (isMatch)
776                         RRETURN;
777                     else
778                         stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
779                     DPRINTF(("recursively matching lazy group\n"));
780                     minSatNextBracket.set();
781                     RECURSIVE_MATCH_NEW_GROUP(17, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true);
782                 } else { /* OP_KETRMAX */
783                     stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
784                     stack.currentFrame->clobberOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
785                     DPRINTF(("recursively matching greedy group\n"));
786                     minSatNextBracket.set();
787                     RECURSIVE_MATCH_NEW_GROUP(18, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true);
788                     if (isMatch)
789                         RRETURN;
790                     else
791                         stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
792                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
793                 }
794                 RRETURN;
795                 
796             /* Start of subject. */
797
798             BEGIN_OPCODE(CIRC):
799                 if (stack.currentFrame->args.subjectPtr != md.startSubject)
800                     RRETURN_NO_MATCH;
801                 stack.currentFrame->args.instructionPtr++;
802                 NEXT_OPCODE;
803
804             /* After internal newline if multiline. */
805
806             BEGIN_OPCODE(BOL):
807                 if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
808                     RRETURN_NO_MATCH;
809                 stack.currentFrame->args.instructionPtr++;
810                 NEXT_OPCODE;
811
812             /* End of subject. */
813
814             BEGIN_OPCODE(DOLL):
815                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
816                     RRETURN_NO_MATCH;
817                 stack.currentFrame->args.instructionPtr++;
818                 NEXT_OPCODE;
819
820             /* Before internal newline if multiline. */
821
822             BEGIN_OPCODE(EOL):
823                 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
824                     RRETURN_NO_MATCH;
825                 stack.currentFrame->args.instructionPtr++;
826                 NEXT_OPCODE;
827                 
828             /* Word boundary assertions */
829                 
830             BEGIN_OPCODE(NOT_WORD_BOUNDARY):
831             BEGIN_OPCODE(WORD_BOUNDARY): {
832                 bool currentCharIsWordChar = false;
833                 bool previousCharIsWordChar = false;
834                 
835                 if (stack.currentFrame->args.subjectPtr > md.startSubject)
836                     previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
837                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
838                     currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
839                 
840                 /* Now see if the situation is what we want */
841                 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
842                 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
843                     RRETURN_NO_MATCH;
844                 NEXT_OPCODE;
845             }
846                 
847             /* Match a single character type; inline for speed */
848                 
849             BEGIN_OPCODE(NOT_NEWLINE):
850                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
851                     RRETURN_NO_MATCH;
852                 if (isNewline(*stack.currentFrame->args.subjectPtr++))
853                     RRETURN_NO_MATCH;
854                 stack.currentFrame->args.instructionPtr++;
855                 NEXT_OPCODE;
856
857             BEGIN_OPCODE(NOT_DIGIT):
858                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
859                     RRETURN_NO_MATCH;
860                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
861                     RRETURN_NO_MATCH;
862                 stack.currentFrame->args.instructionPtr++;
863                 NEXT_OPCODE;
864
865             BEGIN_OPCODE(DIGIT):
866                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
867                     RRETURN_NO_MATCH;
868                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
869                     RRETURN_NO_MATCH;
870                 stack.currentFrame->args.instructionPtr++;
871                 NEXT_OPCODE;
872
873             BEGIN_OPCODE(NOT_WHITESPACE):
874                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
875                     RRETURN_NO_MATCH;
876                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
877                     RRETURN_NO_MATCH;
878                 stack.currentFrame->args.instructionPtr++;
879                 NEXT_OPCODE;
880
881             BEGIN_OPCODE(WHITESPACE):
882                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
883                     RRETURN_NO_MATCH;
884                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
885                     RRETURN_NO_MATCH;
886                 stack.currentFrame->args.instructionPtr++;
887                 NEXT_OPCODE;
888                 
889             BEGIN_OPCODE(NOT_WORDCHAR):
890                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
891                     RRETURN_NO_MATCH;
892                 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
893                     RRETURN_NO_MATCH;
894                 stack.currentFrame->args.instructionPtr++;
895                 NEXT_OPCODE;
896                 
897             BEGIN_OPCODE(WORDCHAR):
898                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
899                     RRETURN_NO_MATCH;
900                 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
901                     RRETURN_NO_MATCH;
902                 stack.currentFrame->args.instructionPtr++;
903                 NEXT_OPCODE;
904                 
905             /* Match a back reference, possibly repeatedly. Look past the end of the
906              item to see if there is repeat information following. The code is similar
907              to that for character classes, but repeated for efficiency. Then obey
908              similar code to character type repeats - written out again for speed.
909              However, if the referenced string is the empty string, always treat
910              it as matched, any number of times (otherwise there could be infinite
911              loops). */
912                 
913             BEGIN_OPCODE(REF):
914                 stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1;               /* Doubled ref number */
915                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
916                 
917                 /* If the reference is unset, set the length to be longer than the amount
918                  of subject left; this ensures that every attempt at a match fails. We
919                  can't just fail here, because of the possibility of quantifiers with zero
920                  minima. */
921                 
922                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
923                     stack.currentFrame->locals.length = 0;
924                 else
925                     stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
926                 
927                 /* Set up for repetition, or handle the non-repeated case */
928                 
929                 switch (*stack.currentFrame->args.instructionPtr) {
930                     case OP_CRSTAR:
931                     case OP_CRMINSTAR:
932                     case OP_CRPLUS:
933                     case OP_CRMINPLUS:
934                     case OP_CRQUERY:
935                     case OP_CRMINQUERY:
936                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
937                         break;
938                         
939                     case OP_CRRANGE:
940                     case OP_CRMINRANGE:
941                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
942                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
943                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
944                         if (stack.currentFrame->locals.max == 0)
945                             stack.currentFrame->locals.max = INT_MAX;
946                         stack.currentFrame->args.instructionPtr += 5;
947                         break;
948                     
949                     default:               /* No repeat follows */
950                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
951                             RRETURN_NO_MATCH;
952                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
953                         NEXT_OPCODE;
954                 }
955                 
956                 /* If the length of the reference is zero, just continue with the
957                  main loop. */
958                 
959                 if (stack.currentFrame->locals.length == 0)
960                     NEXT_OPCODE;
961                 
962                 /* First, ensure the minimum number of matches are present. */
963                 
964                 for (int i = 1; i <= min; i++) {
965                     if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
966                         RRETURN_NO_MATCH;
967                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
968                 }
969                 
970                 /* If min = max, continue at the same level without recursion.
971                  They are not both allowed to be zero. */
972                 
973                 if (min == stack.currentFrame->locals.max)
974                     NEXT_OPCODE;
975                 
976                 /* If minimizing, keep trying and advancing the pointer */
977                 
978                 if (minimize) {
979                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
980                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
981                         if (isMatch)
982                             RRETURN;
983                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
984                             RRETURN;
985                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
986                     }
987                     /* Control never reaches here */
988                 }
989                 
990                 /* If maximizing, find the longest string and work backwards */
991                 
992                 else {
993                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
994                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
995                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
996                             break;
997                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
998                     }
999                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1000                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1001                         if (isMatch)
1002                             RRETURN;
1003                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
1004                     }
1005                     RRETURN_NO_MATCH;
1006                 }
1007                 /* Control never reaches here */
1008                 
1009             /* Match a bit-mapped character class, possibly repeatedly. This op code is
1010              used when all the characters in the class have values in the range 0-255,
1011              and either the matching is caseful, or the characters are in the range
1012              0-127 when UTF-8 processing is enabled. The only difference between
1013              OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1014              encountered.
1015              
1016              First, look past the end of the item to see if there is repeat information
1017              following. Then obey similar code to character type repeats - written out
1018              again for speed. */
1019                 
1020             BEGIN_OPCODE(NCLASS):
1021             BEGIN_OPCODE(CLASS):
1022                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
1023                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
1024                 
1025                 switch (*stack.currentFrame->args.instructionPtr) {
1026                     case OP_CRSTAR:
1027                     case OP_CRMINSTAR:
1028                     case OP_CRPLUS:
1029                     case OP_CRMINPLUS:
1030                     case OP_CRQUERY:
1031                     case OP_CRMINQUERY:
1032                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1033                         break;
1034                         
1035                     case OP_CRRANGE:
1036                     case OP_CRMINRANGE:
1037                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1038                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1039                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
1040                         if (stack.currentFrame->locals.max == 0)
1041                             stack.currentFrame->locals.max = INT_MAX;
1042                         stack.currentFrame->args.instructionPtr += 5;
1043                         break;
1044                         
1045                     default:               /* No repeat follows */
1046                         min = stack.currentFrame->locals.max = 1;
1047                         break;
1048                 }
1049                 
1050                 /* First, ensure the minimum number of matches are present. */
1051                 
1052                 for (int i = 1; i <= min; i++) {
1053                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1054                         RRETURN_NO_MATCH;
1055                     int c = *stack.currentFrame->args.subjectPtr++;
1056                     if (c > 255) {
1057                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1058                             RRETURN_NO_MATCH;
1059                     } else {
1060                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1061                             RRETURN_NO_MATCH;
1062                     }
1063                 }
1064                 
1065                 /* If max == min we can continue with the main loop without the
1066                  need to recurse. */
1067                 
1068                 if (min == stack.currentFrame->locals.max)
1069                     NEXT_OPCODE;      
1070                 
1071                 /* If minimizing, keep testing the rest of the expression and advancing
1072                  the pointer while it matches the class. */
1073                 if (minimize) {
1074                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1075                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1076                         if (isMatch)
1077                             RRETURN;
1078                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1079                             RRETURN;
1080                         int c = *stack.currentFrame->args.subjectPtr++;
1081                         if (c > 255) {
1082                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1083                                 RRETURN;
1084                         } else {
1085                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
1086                                 RRETURN;
1087                         }
1088                     }
1089                     /* Control never reaches here */
1090                 }
1091                 /* If maximizing, find the longest possible run, then work backwards. */
1092                 else {
1093                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1094                     
1095                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
1096                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1097                             break;
1098                         int c = *stack.currentFrame->args.subjectPtr;
1099                         if (c > 255) {
1100                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1101                                 break;
1102                         } else {
1103                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1104                                 break;
1105                         }
1106                         ++stack.currentFrame->args.subjectPtr;
1107                     }
1108                     for (;;) {
1109                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1110                         if (isMatch)
1111                             RRETURN;
1112                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1113                             break;        /* Stop if tried at original pos */
1114                     }
1115                     
1116                     RRETURN;
1117                 }
1118                 /* Control never reaches here */
1119                 
1120             /* Match an extended character class. */
1121                 
1122             BEGIN_OPCODE(XCLASS):
1123                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
1124                 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                      /* Advance past the item */
1125                 
1126                 switch (*stack.currentFrame->args.instructionPtr) {
1127                     case OP_CRSTAR:
1128                     case OP_CRMINSTAR:
1129                     case OP_CRPLUS:
1130                     case OP_CRMINPLUS:
1131                     case OP_CRQUERY:
1132                     case OP_CRMINQUERY:
1133                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1134                         break;
1135                         
1136                     case OP_CRRANGE:
1137                     case OP_CRMINRANGE:
1138                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1139                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1140                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
1141                         if (stack.currentFrame->locals.max == 0)
1142                             stack.currentFrame->locals.max = INT_MAX;
1143                         stack.currentFrame->args.instructionPtr += 5;
1144                         break;
1145                         
1146                     default:               /* No repeat follows */
1147                         min = stack.currentFrame->locals.max = 1;
1148                 }
1149                 
1150                 /* First, ensure the minimum number of matches are present. */
1151                 
1152                 for (int i = 1; i <= min; i++) {
1153                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1154                         RRETURN_NO_MATCH;
1155                     int c = *stack.currentFrame->args.subjectPtr++;
1156                     if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
1157                         RRETURN_NO_MATCH;
1158                 }
1159                 
1160                 /* If max == min we can continue with the main loop without the
1161                  need to recurse. */
1162                 
1163                 if (min == stack.currentFrame->locals.max)
1164                     NEXT_OPCODE;
1165                 
1166                 /* If minimizing, keep testing the rest of the expression and advancing
1167                  the pointer while it matches the class. */
1168                 
1169                 if (minimize) {
1170                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1171                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1172                         if (isMatch)
1173                             RRETURN;
1174                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1175                             RRETURN;
1176                         int c = *stack.currentFrame->args.subjectPtr++;
1177                         if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
1178                             RRETURN;
1179                     }
1180                     /* Control never reaches here */
1181                 }
1182                 
1183                 /* If maximizing, find the longest possible run, then work backwards. */
1184                 
1185                 else {
1186                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1187                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
1188                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1189                             break;
1190                         int c = *stack.currentFrame->args.subjectPtr;
1191                         if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
1192                             break;
1193                         ++stack.currentFrame->args.subjectPtr;
1194                     }
1195                     for(;;) {
1196                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1197                         if (isMatch)
1198                             RRETURN;
1199                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1200                             break;        /* Stop if tried at original pos */
1201                     }
1202                     RRETURN;
1203                 }
1204                 
1205                 /* Control never reaches here */
1206                 
1207             /* Match a single character, casefully */
1208                 
1209             BEGIN_OPCODE(CHAR):
1210                 stack.currentFrame->locals.length = 1;
1211                 stack.currentFrame->args.instructionPtr++;
1212                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1213                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1214                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1215                     RRETURN_NO_MATCH;
1216                 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
1217                     RRETURN_NO_MATCH;
1218                 NEXT_OPCODE;
1219                 
1220             /* Match a single character, caselessly */
1221                 
1222             BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1223                 stack.currentFrame->locals.length = 1;
1224                 stack.currentFrame->args.instructionPtr++;
1225                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1226                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1227                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1228                     RRETURN_NO_MATCH;
1229                 int dc = *stack.currentFrame->args.subjectPtr++;
1230                 if (stack.currentFrame->locals.fc != dc && jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
1231                     RRETURN_NO_MATCH;
1232                 NEXT_OPCODE;
1233             }
1234                 
1235             /* Match a single ASCII character. */
1236                 
1237             BEGIN_OPCODE(ASCII_CHAR):
1238                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1239                     RRETURN_NO_MATCH;
1240                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1241                     RRETURN_NO_MATCH;
1242                 ++stack.currentFrame->args.subjectPtr;
1243                 stack.currentFrame->args.instructionPtr += 2;
1244                 NEXT_OPCODE;
1245                 
1246             /* Match one of two cases of an ASCII letter. */
1247                 
1248             BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1249                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1250                     RRETURN_NO_MATCH;
1251                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1252                     RRETURN_NO_MATCH;
1253                 ++stack.currentFrame->args.subjectPtr;
1254                 stack.currentFrame->args.instructionPtr += 2;
1255                 NEXT_OPCODE;
1256                 
1257             /* Match a single character repeatedly; different opcodes share code. */
1258                 
1259             BEGIN_OPCODE(EXACT):
1260                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1261                 minimize = false;
1262                 stack.currentFrame->args.instructionPtr += 3;
1263                 goto REPEATCHAR;
1264                 
1265             BEGIN_OPCODE(UPTO):
1266             BEGIN_OPCODE(MINUPTO):
1267                 min = 0;
1268                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1269                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1270                 stack.currentFrame->args.instructionPtr += 3;
1271                 goto REPEATCHAR;
1272                 
1273             BEGIN_OPCODE(STAR):
1274             BEGIN_OPCODE(MINSTAR):
1275             BEGIN_OPCODE(PLUS):
1276             BEGIN_OPCODE(MINPLUS):
1277             BEGIN_OPCODE(QUERY):
1278             BEGIN_OPCODE(MINQUERY):
1279                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1280                 
1281                 /* Common code for all repeated single-character matches. We can give
1282                  up quickly if there are fewer than the minimum number of characters left in
1283                  the subject. */
1284                 
1285             REPEATCHAR:
1286                 
1287                 stack.currentFrame->locals.length = 1;
1288                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1289                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
1290                     RRETURN_NO_MATCH;
1291                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1292                 
1293                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1294                     othercase = md.ignoreCase ? jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1295                     
1296                     for (int i = 1; i <= min; i++) {
1297                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1298                             RRETURN_NO_MATCH;
1299                         ++stack.currentFrame->args.subjectPtr;
1300                     }
1301                     
1302                     if (min == stack.currentFrame->locals.max)
1303                         NEXT_OPCODE;
1304                     
1305                     if (minimize) {
1306                         stack.currentFrame->locals.repeatOthercase = othercase;
1307                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1308                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1309                             if (isMatch)
1310                                 RRETURN;
1311                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1312                                 RRETURN;
1313                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
1314                                 RRETURN;
1315                             ++stack.currentFrame->args.subjectPtr;
1316                         }
1317                         /* Control never reaches here */
1318                     } else {
1319                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1320                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1321                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1322                                 break;
1323                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1324                                 break;
1325                             ++stack.currentFrame->args.subjectPtr;
1326                         }
1327                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1328                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1329                             if (isMatch)
1330                                 RRETURN;
1331                             --stack.currentFrame->args.subjectPtr;
1332                         }
1333                         RRETURN_NO_MATCH;
1334                     }
1335                     /* Control never reaches here */
1336                 } else {
1337                     /* No case on surrogate pairs, so no need to bother with "othercase". */
1338                     
1339                     for (int i = 1; i <= min; i++) {
1340                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1341                             RRETURN_NO_MATCH;
1342                         stack.currentFrame->args.subjectPtr += 2;
1343                     }
1344                     
1345                     if (min == stack.currentFrame->locals.max)
1346                         NEXT_OPCODE;
1347                     
1348                     if (minimize) {
1349                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1350                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1351                             if (isMatch)
1352                                 RRETURN;
1353                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1354                                 RRETURN;
1355                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1356                                 RRETURN;
1357                             stack.currentFrame->args.subjectPtr += 2;
1358                         }
1359                         /* Control never reaches here */
1360                     } else {
1361                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1362                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1363                             if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
1364                                 break;
1365                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1366                                 break;
1367                             stack.currentFrame->args.subjectPtr += 2;
1368                         }
1369                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1370                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1371                             if (isMatch)
1372                                 RRETURN;
1373                             stack.currentFrame->args.subjectPtr -= 2;
1374                         }
1375                         RRETURN_NO_MATCH;
1376                     }
1377                     /* Control never reaches here */
1378                 }
1379                 /* Control never reaches here */
1380                 
1381             /* Match a negated single one-byte character. */
1382                 
1383             BEGIN_OPCODE(NOT): {
1384                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1385                     RRETURN_NO_MATCH;
1386                 int b = stack.currentFrame->args.instructionPtr[1];
1387                 int c = *stack.currentFrame->args.subjectPtr++;
1388                 stack.currentFrame->args.instructionPtr += 2;
1389                 if (md.ignoreCase) {
1390                     if (c < 128)
1391                         c = toLowerCase(c);
1392                     if (toLowerCase(b) == c)
1393                         RRETURN_NO_MATCH;
1394                 } else {
1395                     if (b == c)
1396                         RRETURN_NO_MATCH;
1397                 }
1398                 NEXT_OPCODE;
1399             }
1400                 
1401             /* Match a negated single one-byte character repeatedly. This is almost a
1402              repeat of the code for a repeated single character, but I haven't found a
1403              nice way of commoning these up that doesn't require a test of the
1404              positive/negative option for each character match. Maybe that wouldn't add
1405              very much to the time taken, but character matching *is* what this is all
1406              about... */
1407                 
1408             BEGIN_OPCODE(NOTEXACT):
1409                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1410                 minimize = false;
1411                 stack.currentFrame->args.instructionPtr += 3;
1412                 goto REPEATNOTCHAR;
1413                 
1414             BEGIN_OPCODE(NOTUPTO):
1415             BEGIN_OPCODE(NOTMINUPTO):
1416                 min = 0;
1417                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1418                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1419                 stack.currentFrame->args.instructionPtr += 3;
1420                 goto REPEATNOTCHAR;
1421                 
1422             BEGIN_OPCODE(NOTSTAR):
1423             BEGIN_OPCODE(NOTMINSTAR):
1424             BEGIN_OPCODE(NOTPLUS):
1425             BEGIN_OPCODE(NOTMINPLUS):
1426             BEGIN_OPCODE(NOTQUERY):
1427             BEGIN_OPCODE(NOTMINQUERY):
1428                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1429                 
1430             /* Common code for all repeated single-byte matches. We can give up quickly
1431              if there are fewer than the minimum number of bytes left in the
1432              subject. */
1433                 
1434             REPEATNOTCHAR:
1435                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1436                     RRETURN_NO_MATCH;
1437                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1438                 
1439                 /* The code is duplicated for the caseless and caseful cases, for speed,
1440                  since matching characters is likely to be quite common. First, ensure the
1441                  minimum number of matches are present. If min = max, continue at the same
1442                  level without recursing. Otherwise, if minimizing, keep trying the rest of
1443                  the expression and advancing one matching character if failing, up to the
1444                  maximum. Alternatively, if maximizing, find the maximum number of
1445                  characters and work backwards. */
1446                 
1447                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1448                 
1449                 if (md.ignoreCase) {
1450                     if (stack.currentFrame->locals.fc < 128)
1451                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1452                     
1453                     for (int i = 1; i <= min; i++) {
1454                         int d = *stack.currentFrame->args.subjectPtr++;
1455                         if (d < 128)
1456                             d = toLowerCase(d);
1457                         if (stack.currentFrame->locals.fc == d)
1458                             RRETURN_NO_MATCH;
1459                     }
1460                     
1461                     if (min == stack.currentFrame->locals.max)
1462                         NEXT_OPCODE;      
1463                     
1464                     if (minimize) {
1465                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1466                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1467                             if (isMatch)
1468                                 RRETURN;
1469                             int d = *stack.currentFrame->args.subjectPtr++;
1470                             if (d < 128)
1471                                 d = toLowerCase(d);
1472                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1473                                 RRETURN;
1474                         }
1475                         /* Control never reaches here */
1476                     }
1477                     
1478                     /* Maximize case */
1479                     
1480                     else {
1481                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1482                         
1483                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1484                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1485                                 break;
1486                             int d = *stack.currentFrame->args.subjectPtr;
1487                             if (d < 128)
1488                                 d = toLowerCase(d);
1489                             if (stack.currentFrame->locals.fc == d)
1490                                 break;
1491                             ++stack.currentFrame->args.subjectPtr;
1492                         }
1493                         for (;;) {
1494                             RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1495                             if (isMatch)
1496                                 RRETURN;
1497                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1498                                 break;        /* Stop if tried at original pos */
1499                         }
1500                         
1501                         RRETURN;
1502                     }
1503                     /* Control never reaches here */
1504                 }
1505                 
1506                 /* Caseful comparisons */
1507                 
1508                 else {
1509                     for (int i = 1; i <= min; i++) {
1510                         int d = *stack.currentFrame->args.subjectPtr++;
1511                         if (stack.currentFrame->locals.fc == d)
1512                             RRETURN_NO_MATCH;
1513                     }
1514
1515                     if (min == stack.currentFrame->locals.max)
1516                         NEXT_OPCODE;
1517                     
1518                     if (minimize) {
1519                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1520                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1521                             if (isMatch)
1522                                 RRETURN;
1523                             int d = *stack.currentFrame->args.subjectPtr++;
1524                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1525                                 RRETURN;
1526                         }
1527                         /* Control never reaches here */
1528                     }
1529                     
1530                     /* Maximize case */
1531                     
1532                     else {
1533                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1534                         
1535                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
1536                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1537                                 break;
1538                             int d = *stack.currentFrame->args.subjectPtr;
1539                             if (stack.currentFrame->locals.fc == d)
1540                                 break;
1541                             ++stack.currentFrame->args.subjectPtr;
1542                         }
1543                         for (;;) {
1544                             RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1545                             if (isMatch)
1546                                 RRETURN;
1547                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1548                                 break;        /* Stop if tried at original pos */
1549                         }
1550
1551                         RRETURN;
1552                     }
1553                 }
1554                 /* Control never reaches here */
1555                 
1556             /* Match a single character type repeatedly; several different opcodes
1557              share code. This is very similar to the code for single characters, but we
1558              repeat it in the interests of efficiency. */
1559                 
1560             BEGIN_OPCODE(TYPEEXACT):
1561                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1562                 minimize = true;
1563                 stack.currentFrame->args.instructionPtr += 3;
1564                 goto REPEATTYPE;
1565                 
1566             BEGIN_OPCODE(TYPEUPTO):
1567             BEGIN_OPCODE(TYPEMINUPTO):
1568                 min = 0;
1569                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1570                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1571                 stack.currentFrame->args.instructionPtr += 3;
1572                 goto REPEATTYPE;
1573                 
1574             BEGIN_OPCODE(TYPESTAR):
1575             BEGIN_OPCODE(TYPEMINSTAR):
1576             BEGIN_OPCODE(TYPEPLUS):
1577             BEGIN_OPCODE(TYPEMINPLUS):
1578             BEGIN_OPCODE(TYPEQUERY):
1579             BEGIN_OPCODE(TYPEMINQUERY):
1580                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1581                 
1582                 /* Common code for all repeated single character type matches. Note that
1583                  in UTF-8 mode, '.' matches a character of any length, but for the other
1584                  character types, the valid characters are all one-byte long. */
1585                 
1586             REPEATTYPE:
1587                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
1588                 
1589                 /* First, ensure the minimum number of matches are present. Use inline
1590                  code for maximizing the speed, and do the type test once at the start
1591                  (i.e. keep it out of the loop). Also we can test that there are at least
1592                  the minimum number of characters before we start. */
1593                 
1594                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1595                     RRETURN_NO_MATCH;
1596                 if (min > 0) {
1597                     switch (stack.currentFrame->locals.ctype) {
1598                         case OP_NOT_NEWLINE:
1599                             for (int i = 1; i <= min; i++) {
1600                                 if (isNewline(*stack.currentFrame->args.subjectPtr))
1601                                     RRETURN_NO_MATCH;
1602                                 ++stack.currentFrame->args.subjectPtr;
1603                             }
1604                             break;
1605                             
1606                         case OP_NOT_DIGIT:
1607                             for (int i = 1; i <= min; i++) {
1608                                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1609                                     RRETURN_NO_MATCH;
1610                                 ++stack.currentFrame->args.subjectPtr;
1611                             }
1612                             break;
1613                             
1614                         case OP_DIGIT:
1615                             for (int i = 1; i <= min; i++) {
1616                                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1617                                     RRETURN_NO_MATCH;
1618                                 ++stack.currentFrame->args.subjectPtr;
1619                             }
1620                             break;
1621                             
1622                         case OP_NOT_WHITESPACE:
1623                             for (int i = 1; i <= min; i++) {
1624                                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
1625                                     RRETURN_NO_MATCH;
1626                                 ++stack.currentFrame->args.subjectPtr;
1627                             }
1628                             break;
1629                             
1630                         case OP_WHITESPACE:
1631                             for (int i = 1; i <= min; i++) {
1632                                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
1633                                     RRETURN_NO_MATCH;
1634                                 ++stack.currentFrame->args.subjectPtr;
1635                             }
1636                             break;
1637                             
1638                         case OP_NOT_WORDCHAR:
1639                             for (int i = 1; i <= min; i++) {
1640                                 if (isWordChar(*stack.currentFrame->args.subjectPtr))
1641                                     RRETURN_NO_MATCH;
1642                                 ++stack.currentFrame->args.subjectPtr;
1643                             }
1644                             break;
1645                             
1646                         case OP_WORDCHAR:
1647                             for (int i = 1; i <= min; i++) {
1648                                 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
1649                                     RRETURN_NO_MATCH;
1650                                 ++stack.currentFrame->args.subjectPtr;
1651                             }
1652                             break;
1653                             
1654                         default:
1655                             JS_NOT_REACHED("Invalid character type.");
1656                             return matchError(JSRegExpErrorInternal, stack);
1657                     }  /* End switch(stack.currentFrame->locals.ctype) */
1658                 }
1659                 
1660                 /* If min = max, continue at the same level without recursing */
1661                 
1662                 if (min == stack.currentFrame->locals.max)
1663                     NEXT_OPCODE;    
1664                 
1665                 /* If minimizing, we have to test the rest of the pattern before each
1666                  subsequent match. */
1667                 
1668                 if (minimize) {
1669                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1670                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1671                         if (isMatch)
1672                             RRETURN;
1673                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1674                             RRETURN;
1675                         
1676                         int c = *stack.currentFrame->args.subjectPtr++;
1677                         switch (stack.currentFrame->locals.ctype) {
1678                             case OP_NOT_NEWLINE:
1679                                 if (isNewline(c))
1680                                     RRETURN;
1681                                 break;
1682                                 
1683                             case OP_NOT_DIGIT:
1684                                 if (isASCIIDigit(c))
1685                                     RRETURN;
1686                                 break;
1687                                 
1688                             case OP_DIGIT:
1689                                 if (!isASCIIDigit(c))
1690                                     RRETURN;
1691                                 break;
1692                                 
1693                             case OP_NOT_WHITESPACE:
1694                                 if (isSpaceChar(c))
1695                                     RRETURN;
1696                                 break;
1697                                 
1698                             case OP_WHITESPACE:
1699                                 if (!isSpaceChar(c))
1700                                     RRETURN;
1701                                 break;
1702                                 
1703                             case OP_NOT_WORDCHAR:
1704                                 if (isWordChar(c))
1705                                     RRETURN;
1706                                 break;
1707                                 
1708                             case OP_WORDCHAR:
1709                                 if (!isWordChar(c))
1710                                     RRETURN;
1711                                 break;
1712                                 
1713                             default:
1714                                 JS_NOT_REACHED("Invalid character type.");
1715                                 return matchError(JSRegExpErrorInternal, stack);
1716                         }
1717                     }
1718                     /* Control never reaches here */
1719                 }
1720                 
1721                 /* If maximizing it is worth using inline code for speed, doing the type
1722                  test once at the start (i.e. keep it out of the loop). */
1723                 
1724                 else {
1725                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
1726                     
1727                     switch (stack.currentFrame->locals.ctype) {
1728                         case OP_NOT_NEWLINE:
1729                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1730                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
1731                                     break;
1732                                 stack.currentFrame->args.subjectPtr++;
1733                             }
1734                             break;
1735                             
1736                         case OP_NOT_DIGIT:
1737                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1738                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1739                                     break;
1740                                 int c = *stack.currentFrame->args.subjectPtr;
1741                                 if (isASCIIDigit(c))
1742                                     break;
1743                                 ++stack.currentFrame->args.subjectPtr;
1744                             }
1745                             break;
1746                             
1747                         case OP_DIGIT:
1748                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1749                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1750                                     break;
1751                                 int c = *stack.currentFrame->args.subjectPtr;
1752                                 if (!isASCIIDigit(c))
1753                                     break;
1754                                 ++stack.currentFrame->args.subjectPtr;
1755                             }
1756                             break;
1757                             
1758                         case OP_NOT_WHITESPACE:
1759                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1760                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1761                                     break;
1762                                 int c = *stack.currentFrame->args.subjectPtr;
1763                                 if (isSpaceChar(c))
1764                                     break;
1765                                 ++stack.currentFrame->args.subjectPtr;
1766                             }
1767                             break;
1768                             
1769                         case OP_WHITESPACE:
1770                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1771                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1772                                     break;
1773                                 int c = *stack.currentFrame->args.subjectPtr;
1774                                 if (!isSpaceChar(c))
1775                                     break;
1776                                 ++stack.currentFrame->args.subjectPtr;
1777                             }
1778                             break;
1779                             
1780                         case OP_NOT_WORDCHAR:
1781                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1782                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1783                                     break;
1784                                 int c = *stack.currentFrame->args.subjectPtr;
1785                                 if (isWordChar(c))
1786                                     break;
1787                                 ++stack.currentFrame->args.subjectPtr;
1788                             }
1789                             break;
1790                             
1791                         case OP_WORDCHAR:
1792                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
1793                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1794                                     break;
1795                                 int c = *stack.currentFrame->args.subjectPtr;
1796                                 if (!isWordChar(c))
1797                                     break;
1798                                 ++stack.currentFrame->args.subjectPtr;
1799                             }
1800                             break;
1801                             
1802                         default:
1803                             JS_NOT_REACHED("Invalid character type.");
1804                             return matchError(JSRegExpErrorInternal, stack);
1805                     }
1806                     
1807                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1808                     
1809                     for (;;) {
1810                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1811                         if (isMatch)
1812                             RRETURN;
1813                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1814                             break;        /* Stop if tried at original pos */
1815                     }
1816                     
1817                     /* Get here if we can't make it match with any permitted repetitions */
1818                     
1819                     RRETURN;
1820                 }
1821                 /* Control never reaches here */
1822                 
1823             BEGIN_OPCODE(CRMINPLUS):
1824             BEGIN_OPCODE(CRMINQUERY):
1825             BEGIN_OPCODE(CRMINRANGE):
1826             BEGIN_OPCODE(CRMINSTAR):
1827             BEGIN_OPCODE(CRPLUS):
1828             BEGIN_OPCODE(CRQUERY):
1829             BEGIN_OPCODE(CRRANGE):
1830             BEGIN_OPCODE(CRSTAR):
1831                 JS_NOT_REACHED("Invalid opcode.");
1832                 return matchError(JSRegExpErrorInternal, stack);
1833                 
1834 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1835             CAPTURING_BRACKET:
1836 #else
1837             default:
1838 #endif
1839                 /* Opening capturing bracket. If there is space in the offset vector, save
1840                  the current subject position in the working slot at the top of the vector. We
1841                  mustn't change the current values of the data slot, because they may be set
1842                  from a previous iteration of this group, and be referred to by a reference
1843                  inside the group.
1844                  
1845                  If the bracket fails to match, we need to restore this value and also the
1846                  values of the final offsets, in case they were set by a previous iteration of
1847                  the same bracket.
1848                  
1849                  If there isn't enough space in the offset vector, treat this as if it were a
1850                  non-capturing bracket. Don't worry about setting the flag for the error case
1851                  here; that is handled in the code for KET. */
1852                 
1853                 JS_ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1854                 
1855                 LOCALS(number) = *stack.currentFrame->args.instructionPtr - OP_BRA;
1856                 stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr);
1857                 DPRINTF(("opening capturing bracket %d\n", stack.currentFrame->locals.number));
1858                 
1859                 /* For extended extraction brackets (large number), we have to fish out the
1860                  number from a dummy opcode at the start. */
1861                 
1862                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1863                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 4 + LINK_SIZE);
1864                 stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number;
1865
1866                 JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket));
1867                 
1868                 if (stack.currentFrame->locals.offset < md.offsetMax) {
1869                     stack.currentFrame->locals.savedSubjectOffset = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
1870                     DPRINTF(("setting subject offset for bracket to %d\n", stack.currentFrame->args.subjectPtr - md.startSubject));
1871                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
1872                     stack.currentFrame->locals.skipBytes = 3; /* For OP_BRAs. */
1873                     
1874                     /* We must compute this value at the top, before we move the instruction pointer. */
1875                     stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear();
1876                     do {
1877                         /* We need to extract this into a variable so we can correctly pass it by value
1878                          through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */
1879                         minSatisfied = stack.currentFrame->locals.minSatisfied;
1880                         RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied);
1881                         if (isMatch)
1882                             RRETURN;
1883                         stack.currentFrame->locals.skipBytes = 1; /* For OP_ALTs. */
1884                         stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
1885                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1886                     
1887                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1888                     for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
1889                         md.setOffsetPair(i, -1, -1);
1890                     DPRINTF(("restoring subject offset for bracket to %d\n", stack.currentFrame->locals.savedSubjectOffset));
1891                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.savedSubjectOffset;
1892                     
1893                     RRETURN;
1894                 }
1895                 
1896                 /* Insufficient room for saving captured contents */
1897                 
1898                 goto NON_CAPTURING_BRACKET;
1899         }
1900         
1901         /* Do not stick any code in here without much thought; it is assumed
1902          that "continue" in the code above comes out to here to repeat the main
1903          loop. */
1904         
1905     } /* End of main loop */
1906     
1907     JS_NOT_REACHED("Loop does not fallthru.");
1908     
1909 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1910     
1911 RRETURN_SWITCH:
1912     switch (stack.currentFrame->returnLocation) {
1913         case 0: goto RETURN;
1914         case 1: goto RRETURN_1;
1915         case 2: goto RRETURN_2;
1916         case 6: goto RRETURN_6;
1917         case 7: goto RRETURN_7;
1918         case 14: goto RRETURN_14;
1919         case 15: goto RRETURN_15;
1920         case 16: goto RRETURN_16;
1921         case 17: goto RRETURN_17;
1922         case 18: goto RRETURN_18;
1923         case 19: goto RRETURN_19;
1924         case 20: goto RRETURN_20;
1925         case 21: goto RRETURN_21;
1926         case 22: goto RRETURN_22;
1927         case 24: goto RRETURN_24;
1928         case 26: goto RRETURN_26;
1929         case 27: goto RRETURN_27;
1930         case 28: goto RRETURN_28;
1931         case 29: goto RRETURN_29;
1932         case 30: goto RRETURN_30;
1933         case 31: goto RRETURN_31;
1934         case 38: goto RRETURN_38;
1935         case 40: goto RRETURN_40;
1936         case 42: goto RRETURN_42;
1937         case 44: goto RRETURN_44;
1938         case 48: goto RRETURN_48;
1939         case 52: goto RRETURN_52;
1940     }
1941     
1942     JS_NOT_REACHED("Bad computed return location.");
1943     return matchError(JSRegExpErrorInternal, stack);
1944     
1945 #endif
1946     
1947 RETURN:
1948     return isMatch;
1949 }
1950
1951
1952 /*************************************************
1953 *         Execute a Regular Expression           *
1954 *************************************************/
1955
1956 /* This function applies a compiled re to a subject string and picks out
1957 portions of the string if it matches. Two elements in the vector are set for
1958 each substring: the offsets to the start and end of the substring.
1959
1960 Arguments:
1961   re              points to the compiled expression
1962   extra_data      points to extra data or is NULL
1963   subject         points to the subject string
1964   length          length of subject string (may contain binary zeros)
1965   start_offset    where to start in the subject string
1966   options         option bits
1967   offsets         points to a vector of ints to be filled in with offsets
1968   offsetCount     the number of elements in the vector
1969
1970 Returns:          > 0 => success; value is the number of elements filled in
1971                   = 0 => success, but offsets is not big enough
1972                    -1 => failed to match
1973                  < -1 => some kind of unexpected problem
1974 */
1975
1976 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
1977 {
1978     // If firstByte is set, try scanning to the first instance of that byte
1979     // no need to try and match against any earlier part of the subject string.
1980     if (firstByte >= 0) {
1981         UChar firstChar = firstByte;
1982         if (firstByteIsCaseless)
1983             while (subjectPtr < endSubject) {
1984                 int c = *subjectPtr;
1985                 if (c > 127)
1986                     break;
1987                 if (toLowerCase(c) == firstChar)
1988                     break;
1989                 subjectPtr++;
1990             }
1991         else {
1992             while (subjectPtr < endSubject && *subjectPtr != firstChar)
1993                 subjectPtr++;
1994         }
1995     } else if (useMultiLineFirstCharOptimization) {
1996         /* Or to just after \n for a multiline match if possible */
1997         // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
1998         if (subjectPtr > originalSubjectStart) {
1999             while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
2000                 subjectPtr++;
2001         }
2002     }
2003 }
2004
2005 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr)
2006 {
2007     /* If reqByte is set, we know that that character must appear in the subject
2008      for the match to succeed. If the first character is set, reqByte must be
2009      later in the subject; otherwise the test starts at the match point. This
2010      optimization can save a huge amount of backtracking in patterns with nested
2011      unlimited repeats that aren't going to match. Writing separate code for
2012      cased/caseless versions makes it go faster, as does using an autoincrement
2013      and backing off on a match.
2014      
2015      HOWEVER: when the subject string is very, very long, searching to its end can
2016      take a long time, and give bad performance on quite ordinary patterns. This
2017      showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2018      don't do this when the string is sufficiently long.
2019     */
2020
2021     if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
2022         const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
2023
2024         /* We don't need to repeat the search if we haven't yet reached the
2025          place we found it at last time. */
2026
2027         if (p > reqBytePtr) {
2028             if (reqByteIsCaseless) {
2029                 while (p < endSubject) {
2030                     int pp = *p++;
2031                     if (pp == reqByte || pp == reqByte2) {
2032                         p--;
2033                         break;
2034                     }
2035                 }
2036             } else {
2037                 while (p < endSubject) {
2038                     if (*p++ == reqByte) {
2039                         p--;
2040                         break;
2041                     }
2042                 }
2043             }
2044
2045             /* If we can't find the required character, break the matching loop */
2046
2047             if (p >= endSubject)
2048                 return true;
2049
2050             /* If we have found the required character, save the point where we
2051              found it, so that we don't search again next time round the loop if
2052              the start hasn't passed this character yet. */
2053
2054             reqBytePtr = p;
2055         }
2056     }
2057     return false;
2058 }
2059
2060 int jsRegExpExecute(JSContext *cx, const JSRegExp* re,
2061                     const UChar* subject, int length, int start_offset, int* offsets,
2062                     int offsetCount)
2063 {
2064     JS_ASSERT(re);
2065     JS_ASSERT(subject || !length);
2066     JS_ASSERT(offsetCount >= 0);
2067     JS_ASSERT(offsets || offsetCount == 0);
2068
2069     MatchData matchBlock;
2070     matchBlock.startSubject = subject;
2071     matchBlock.endSubject = matchBlock.startSubject + length;
2072     const UChar* endSubject = matchBlock.endSubject;
2073     
2074     matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
2075     matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
2076     
2077     /* Use the vector supplied, rounding down its size to a multiple of 3. */
2078     int ocount = offsetCount - (offsetCount % 3);
2079     
2080     matchBlock.offsetVector = offsets;
2081     matchBlock.offsetEnd = ocount;
2082     matchBlock.offsetMax = (2*ocount)/3;
2083     matchBlock.offsetOverflow = false;
2084     
2085     /* Compute the minimum number of offsets that we need to reset each time. Doing
2086      this makes a huge difference to execution time when there aren't many brackets
2087      in the pattern. */
2088     
2089     int resetCount = 2 + re->topBracket * 2;
2090     if (resetCount > offsetCount)
2091         resetCount = ocount;
2092     
2093     /* Reset the working variable associated with each extraction. These should
2094      never be used unless previously set, but they get saved and restored, and so we
2095      initialize them to avoid reading uninitialized locations. */
2096     
2097     if (matchBlock.offsetVector) {
2098         int* iptr = matchBlock.offsetVector + ocount;
2099         int* iend = iptr - resetCount/2 + 1;
2100         while (--iptr >= iend)
2101             *iptr = -1;
2102     }
2103     
2104     /* Set up the first character to match, if available. The firstByte value is
2105      never set for an anchored regular expression, but the anchoring may be forced
2106      at run time, so we have to test for anchoring. The first char may be unset for
2107      an unanchored pattern, of course. If there's no first char and the pattern was
2108      studied, there may be a bitmap of possible first characters. */
2109     
2110     bool firstByteIsCaseless = false;
2111     int firstByte = -1;
2112     if (re->options & UseFirstByteOptimizationOption) {
2113         firstByte = re->firstByte & 255;
2114         if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE)))
2115             firstByte = toLowerCase(firstByte);
2116     }
2117     
2118     /* For anchored or unanchored matches, there may be a "last known required
2119      character" set. */
2120     
2121     bool reqByteIsCaseless = false;
2122     int reqByte = -1;
2123     int reqByte2 = -1;
2124     if (re->options & UseRequiredByteOptimizationOption) {
2125         reqByte = re->reqByte & 255;
2126         reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE);
2127         reqByte2 = flipCase(reqByte);
2128     }
2129     
2130     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2131      the loop runs just once. */
2132     
2133     const UChar* startMatch = subject + start_offset;
2134     const UChar* reqBytePtr = startMatch - 1;
2135     bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
2136     
2137     do {
2138         /* Reset the maximum number of extractions we might see. */
2139         if (matchBlock.offsetVector) {
2140             int* iptr = matchBlock.offsetVector;
2141             int* iend = iptr + resetCount;
2142             while (iptr < iend)
2143                 *iptr++ = -1;
2144         }
2145         
2146         tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
2147         if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr))
2148             break;
2149                 
2150         /* When a match occurs, substrings will be set for all internal extractions;
2151          we just need to set up the whole thing as substring 0 before returning. If
2152          there were too many extractions, set the return code to zero. In the case
2153          where we had to get some local store to hold offsets for backreferences, copy
2154          those back references that we can. In this case there need not be overflow
2155          if certain parts of the pattern were not used. */
2156         
2157         /* The code starts after the JSRegExp block and the capture name table. */
2158         const unsigned char* start_code = (const unsigned char*)(re + 1);
2159         
2160         int returnCode = match(&cx->regExpPool, startMatch, start_code, 2, matchBlock);
2161         
2162         /* When the result is no match, advance the pointer to the next character
2163          and continue. */
2164         if (returnCode == 0) {
2165             startMatch++;
2166             continue;
2167         }
2168
2169         if (returnCode != 1) {
2170             JS_ASSERT(returnCode == JSRegExpErrorHitLimit);
2171             DPRINTF((">>>> error: returning %d\n", returnCode));
2172             return returnCode;
2173         }
2174         
2175         /* We have a match! */
2176         
2177         returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
2178         
2179         if (offsetCount < 2)
2180             returnCode = 0;
2181         else {
2182             offsets[0] = startMatch - matchBlock.startSubject;
2183             offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
2184         }
2185         
2186         JS_ASSERT(returnCode >= 0);
2187         DPRINTF((">>>> returning %d\n", returnCode));
2188         return returnCode;
2189     } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
2190     
2191     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2192     return JSRegExpErrorNoMatch;
2193 }