1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
7 Originally written by Philip Hazel
8 Copyright (c) 1997-2006 University of Cambridge
9 Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
10 Copyright (C) 2007 Eric Seidel <eric@webkit.org>
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
45 #include "pcre_internal.h"
48 #include "yarr/jswtfbridge.h"
49 #include "yarr/wtf/ASCIICType.h"
55 #if !WTF_COMPILER_MSVC && !WTF_COMPILER_SUNPRO
56 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
59 /* Note: Webkit sources have USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP disabled. */
60 /* Note: There are hardcoded constants all over the place, but in the port of
61 Yarr to TraceMonkey two bytes are added to the OP_BRA* opcodes, so the
62 instruction stream now looks like this at the start of a bracket group:
64 OP_BRA* [link:LINK_SIZE] [minNestedBracket,maxNestedBracket:2]
66 Both capturing and non-capturing brackets encode this information. */
68 /* Avoid warnings on Windows. */
72 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
73 typedef int ReturnLocation;
75 typedef void* ReturnLocation;
78 /* Node on a stack of brackets. This is used to detect and reject
79 matches of the empty string per ECMAScript repeat match rules. This
80 also prevents infinite loops on quantified empty matches. One node
81 represents the start state at the start of this bracket group. */
82 struct BracketChainNode {
83 BracketChainNode* previousBracket;
84 const UChar* bracketStart;
85 /* True if the minimum number of matches was already satisfied
86 when we started matching this group. */
91 ReturnLocation returnLocation;
92 struct MatchFrame* previousFrame;
94 /* The frame allocates saved offsets into the regular expression arena pool so
95 that they can be restored during backtracking. */
96 size_t savedOffsetsSize;
97 JSArenaPool *regExpPool;
99 MatchFrame() : savedOffsetsSize(0), regExpPool(0) {}
100 void init(JSArenaPool *regExpPool) { this->regExpPool = regExpPool; }
102 /* Function arguments that may change */
104 const UChar* subjectPtr;
105 const unsigned char* instructionPtr;
107 BracketChainNode* bracketChain;
111 /* PCRE uses "fake" recursion built off of gotos, thus
112 stack-based local variables are not safe to use. Instead we have to
113 store local variables on the current MatchFrame. */
115 const unsigned char* data;
116 const unsigned char* startOfRepeatingBracket;
117 const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
118 const unsigned char* instructionPtrAtStartOfOnce;
121 int savedSubjectOffset;
136 BracketChainNode bracketChainNode;
139 void saveOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
140 JS_ASSERT(regExpPool);
141 JS_ASSERT(minBracket >= 0);
142 JS_ASSERT(limitBracket >= minBracket);
143 JS_ASSERT(offsetEnd >= 0);
144 if (minBracket == limitBracket)
146 const size_t newSavedOffsetCount = 3 * (limitBracket - minBracket);
147 /* Increase saved offset space if necessary. */
149 size_t targetSize = sizeof(*savedOffsets) * newSavedOffsetCount;
150 if (savedOffsetsSize < targetSize) {
151 JS_ARENA_ALLOCATE_CAST(savedOffsets, int *, regExpPool, targetSize);
152 JS_ASSERT(savedOffsets); /* FIXME: error code, bug 574459. */
153 savedOffsetsSize = targetSize;
156 for (unsigned i = 0; i < unsigned(limitBracket - minBracket); ++i) {
157 int bracketIter = minBracket + i;
158 JS_ASSERT(2 * bracketIter + 1 <= offsetEnd);
159 int start = offsets[2 * bracketIter];
160 int end = offsets[2 * bracketIter + 1];
161 JS_ASSERT(bracketIter <= offsetEnd);
162 int offset = offsets[offsetEnd - bracketIter];
163 DPRINTF(("saving bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset));
164 JS_ASSERT(start <= end);
165 JS_ASSERT(i * 3 + 2 < newSavedOffsetCount);
166 savedOffsets[i * 3 + 0] = start;
167 savedOffsets[i * 3 + 1] = end;
168 savedOffsets[i * 3 + 2] = offset;
172 void clobberOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
173 for (int i = 0; i < limitBracket - minBracket; ++i) {
174 int bracketIter = minBracket + i;
175 JS_ASSERT(2 * bracketIter + 1 < offsetEnd);
176 offsets[2 * bracketIter + 0] = -1;
177 offsets[2 * bracketIter + 1] = -1;
181 void restoreOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
182 JS_ASSERT(regExpPool);
183 JS_ASSERT_IF(limitBracket > minBracket, savedOffsets);
184 for (int i = 0; i < limitBracket - minBracket; ++i) {
185 int bracketIter = minBracket + i;
186 int start = savedOffsets[i * 3 + 0];
187 int end = savedOffsets[i * 3 + 1];
188 int offset = savedOffsets[i * 3 + 2];
189 DPRINTF(("restoring bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset));
190 JS_ASSERT(start <= end);
191 offsets[2 * bracketIter + 0] = start;
192 offsets[2 * bracketIter + 1] = end;
193 offsets[offsetEnd - bracketIter] = offset;
197 /* Extract the bracket data after the current opcode/link at |instructionPtr| into the locals. */
198 void extractBrackets(const unsigned char *instructionPtr) {
199 uint16 bracketMess = get2ByteValue(instructionPtr + 1 + LINK_SIZE);
200 locals.minBracket = (bracketMess >> 8) & 0xff;
201 locals.limitBracket = (bracketMess & 0xff);
202 JS_ASSERT(locals.minBracket <= locals.limitBracket);
205 /* At the start of a bracketed group, add the current subject pointer to the
206 stack of such pointers, to be re-instated at the end of the group when we hit
207 the closing ket. When match() is called in other circumstances, we don't add to
209 void startNewGroup(bool minSatisfied) {
210 locals.bracketChainNode.previousBracket = args.bracketChain;
211 locals.bracketChainNode.bracketStart = args.subjectPtr;
212 locals.bracketChainNode.minSatisfied = minSatisfied;
213 args.bracketChain = &locals.bracketChainNode;
217 /* Structure for passing "static" information around between the functions
218 doing traditional NFA matching, so that they are thread-safe. */
221 int *offsetVector; /* Offset vector */
222 int offsetEnd; /* One past the end */
223 int offsetMax; /* The maximum usable for return data */
224 bool offsetOverflow; /* Set if too many extractions */
225 const UChar *startSubject; /* Start of the subject string */
226 const UChar *endSubject; /* End of the subject string */
227 const UChar *endMatchPtr; /* Subject position at end match */
228 int endOffsetTop; /* Highwater mark at end of match */
232 void setOffsetPair(size_t pairNum, int start, int end) {
233 JS_ASSERT(int(2 * pairNum + 1) < offsetEnd && int(pairNum) < offsetEnd);
234 JS_ASSERT(start <= end);
235 JS_ASSERT_IF(start < 0, start == end && start == -1);
236 DPRINTF(("setting offset pair at %u (%d, %d)\n", pairNum, start, end));
237 offsetVector[2 * pairNum + 0] = start;
238 offsetVector[2 * pairNum + 1] = end;
242 /* The maximum remaining length of subject we are prepared to search for a
245 #define REQ_BYTE_MAX 1000
247 /* The below limit restricts the number of "recursive" match calls in order to
248 avoid spending exponential time on complex regular expressions. */
250 static const unsigned matchLimit = 1000000;
252 /*************************************************
253 * Match a back-reference *
254 *************************************************/
256 /* If a back reference hasn't been set, the length that is passed is greater
257 than the number of characters left in the string, so the match fails.
260 offset index into the offset vector
261 subjectPtr points into the subject
262 length length to be matched
263 md points to match data block
265 Returns: true if matched
268 static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
270 const UChar* p = md.startSubject + md.offsetVector[offset];
272 /* Always fail if not enough characters left */
274 if (length > md.endSubject - subjectPtr)
277 /* Separate the caselesss case for speed */
280 while (length-- > 0) {
282 int othercase = jsc_pcre_ucp_othercase(c);
283 UChar d = *subjectPtr++;
284 if (c != d && othercase != d)
290 if (*p++ != *subjectPtr++)
297 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
299 /* Use numbered labels and switch statement at the bottom of the match function. */
301 #define RMATCH_WHERE(num) num
302 #define RRETURN_LABEL RRETURN_SWITCH
306 /* Use GCC's computed goto extension. */
308 /* For one test case this is more than 40% faster than the switch statement.
309 We could avoid the use of the num argument entirely by using local labels,
310 but using it for the GCC case as well as the non-GCC case allows us to share
311 a bit more code and notice if we use conflicting numbers.*/
313 #define RMATCH_WHERE(num) JS_EXTENSION(&&RRETURN_##num)
314 #define RRETURN_LABEL *stack.currentFrame->returnLocation
318 #define RECURSIVE_MATCH_COMMON(num) \
321 stack.popCurrentFrame();
323 #define RECURSIVE_MATCH(num, ra, rb) \
325 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
326 RECURSIVE_MATCH_COMMON(num) \
329 #define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb, gm) \
331 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
332 stack.currentFrame->startNewGroup(gm); \
333 RECURSIVE_MATCH_COMMON(num) \
336 #define RRETURN do { JS_EXTENSION_(goto RRETURN_LABEL); } while (0)
338 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
340 /*************************************************
341 * Match from current position *
342 *************************************************/
344 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
345 in the subject string, while substringStart holds the value of subjectPtr at the start of the
346 last bracketed group - used for breaking infinite loops matching zero-length
347 strings. This function is called recursively in many circumstances. Whenever it
348 returns a negative (error) response, the outer match() call must also return the
352 subjectPtr pointer in subject
353 instructionPtr position in code
354 offsetTop current top pointer
355 md pointer to "static" info for the match
357 Returns: 1 if matched ) these values are >= 0
358 0 if failed to match )
359 a negative error value if aborted by an error condition
360 (e.g. stopped by repeated call or recursion limit)
363 static const unsigned numFramesOnStack = 16;
366 JSArenaPool *regExpPool;
367 void *regExpPoolMark;
369 MatchStack(JSArenaPool *regExpPool)
370 : regExpPool(regExpPool)
371 , regExpPoolMark(JS_ARENA_MARK(regExpPool))
372 , framesEnd(frames + numFramesOnStack)
373 , currentFrame(frames)
374 , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
376 JS_ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
377 JS_ASSERT(regExpPool);
378 for (size_t i = 0; i < numFramesOnStack; ++i)
379 frames[i].init(regExpPool);
382 ~MatchStack() { JS_ARENA_RELEASE(regExpPool, regExpPoolMark); }
384 MatchFrame frames[numFramesOnStack];
385 MatchFrame* framesEnd;
386 MatchFrame* currentFrame;
389 bool canUseStackBufferForNextFrame() {
390 return size < numFramesOnStack;
393 MatchFrame* allocateNextFrame() {
394 if (canUseStackBufferForNextFrame())
395 return currentFrame + 1;
396 // FIXME: bug 574459 -- no NULL check
397 MatchFrame *frame = js_new<MatchFrame>();
398 frame->init(regExpPool);
402 void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation) {
403 MatchFrame* newframe = allocateNextFrame();
404 newframe->previousFrame = currentFrame;
406 newframe->args.subjectPtr = currentFrame->args.subjectPtr;
407 newframe->args.offsetTop = currentFrame->args.offsetTop;
408 newframe->args.instructionPtr = instructionPtr;
409 newframe->args.bracketChain = bracketChain;
410 newframe->returnLocation = returnLocation;
413 currentFrame = newframe;
416 void popCurrentFrame() {
417 MatchFrame* oldFrame = currentFrame;
418 currentFrame = currentFrame->previousFrame;
419 if (size > numFramesOnStack)
424 void popAllFrames() {
430 static int matchError(int errorCode, MatchStack& stack)
432 stack.popAllFrames();
436 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
437 if there are extra bytes. This is called when we know we are in UTF-8 mode. */
439 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
442 if ((c & 0xc0) == 0xc0) {
443 int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
445 c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
446 for (int gcii = 1; gcii <= gcaa; gcii++) {
448 c |= (subjectPtr[gcii] & 0x3f) << gcss;
454 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
456 // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
457 static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
458 static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
460 JS_ASSERT(instructionOffset >= 0);
461 JS_ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
463 minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
464 minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
465 maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
468 /* Helper class for passing a flag value from one op to the next that runs.
469 This allows us to set the flag in certain ops. When the flag is read, it
470 will be true only if the previous op set the flag, otherwise it is false. */
473 LinearFlag() : flag(false) {}
475 bool readAndClear() {
490 match(JSArenaPool *regExpPool, const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
492 bool isMatch = false;
494 bool minimize = false; /* Initialization not really needed, but some compilers think so. */
495 unsigned remainingMatchCount = matchLimit;
496 int othercase; /* Declare here to avoid errors during jumps */
499 MatchStack stack(regExpPool);
500 LinearFlag minSatNextBracket;
502 /* The opcode jump table. */
503 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
504 #define EMIT_JUMP_TABLE_ENTRY(opcode) JS_EXTENSION(&&LABEL_OP_##opcode)
505 static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
506 #undef EMIT_JUMP_TABLE_ENTRY
509 /* One-time setup of the opcode jump table. */
510 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
511 for (int i = 255; !opcodeJumpTable[i]; i--)
512 opcodeJumpTable[i] = &&CAPTURING_BRACKET;
515 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
516 // Shark shows this as a hot line
517 // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
518 stack.currentFrame->returnLocation = JS_EXTENSION(&&RETURN);
520 stack.currentFrame->returnLocation = 0;
522 stack.currentFrame->args.subjectPtr = subjectPtr;
523 stack.currentFrame->args.instructionPtr = instructionPtr;
524 stack.currentFrame->args.offsetTop = offsetTop;
525 stack.currentFrame->args.bracketChain = 0;
526 stack.currentFrame->startNewGroup(false);
528 /* This is where control jumps back to to effect "recursion" */
531 if (!--remainingMatchCount)
532 return matchError(JSRegExpErrorHitLimit, stack);
534 /* Now start processing the operations. */
536 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
541 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
542 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
543 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
545 #define BEGIN_OPCODE(opcode) case OP_##opcode
546 #define NEXT_OPCODE continue
548 #define LOCALS(__ident) (stack.currentFrame->locals.__ident)
550 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
553 switch (*stack.currentFrame->args.instructionPtr)
556 /* Non-capturing bracket: optimized */
559 NON_CAPTURING_BRACKET:
560 DPRINTF(("start non-capturing bracket\n"));
561 stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr);
562 /* If we see no ALT, we have to skip three bytes of bracket data (link plus nested
564 stack.currentFrame->locals.skipBytes = 3;
565 /* We must compute this value at the top, before we move the instruction pointer. */
566 stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear();
568 /* We need to extract this into a variable so we can correctly pass it by value
569 through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */
570 minSatisfied = stack.currentFrame->locals.minSatisfied;
571 RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied);
573 DPRINTF(("non-capturing bracket succeeded\n"));
576 stack.currentFrame->locals.skipBytes = 1;
577 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
578 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
579 DPRINTF(("non-capturing bracket failed\n"));
580 for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
581 md.setOffsetPair(i, -1, -1);
584 /* Skip over large extraction number data if encountered. */
586 BEGIN_OPCODE(BRANUMBER):
587 stack.currentFrame->args.instructionPtr += 3;
590 /* End of the pattern. */
593 md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */
594 md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */
598 /* Assertion brackets. Check the alternative branches in turn - the
599 matching won't pass the KET for an assertion. If any one branch matches,
600 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
601 start of each branch to move the current point backwards, so the code at
602 this level is identical to the lookahead case. */
604 BEGIN_OPCODE(ASSERT):
606 uint16 bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE);
607 LOCALS(minBracket) = (bracketMess >> 8) & 0xff;
608 LOCALS(limitBracket) = bracketMess & 0xff;
609 JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket));
611 stack.currentFrame->locals.skipBytes = 3;
613 RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false);
616 stack.currentFrame->locals.skipBytes = 1;
617 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
618 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
619 if (*stack.currentFrame->args.instructionPtr == OP_KET) {
620 for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
621 md.setOffsetPair(i, -1, -1);
625 /* Continue from after the assertion, updating the offsets high water
626 mark, since extracts may have been taken during the assertion. */
628 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
629 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
630 stack.currentFrame->args.offsetTop = md.endOffsetTop;
633 /* Negative assertion: all branches must fail to match */
635 BEGIN_OPCODE(ASSERT_NOT):
636 stack.currentFrame->locals.skipBytes = 3;
638 unsigned bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE);
639 LOCALS(minBracket) = (bracketMess >> 8) & 0xff;
640 LOCALS(limitBracket) = bracketMess & 0xff;
642 JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket));
644 RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false);
647 stack.currentFrame->locals.skipBytes = 1;
648 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
649 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
651 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.skipBytes + LINK_SIZE;
654 /* An alternation is the end of a branch; scan along to find the end of the
655 bracketed group and go to there. */
658 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
661 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
662 that it may occur zero times. It may repeat infinitely, or not at all -
663 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
664 repeat limits are compiled as a number of copies, with the optional ones
665 preceded by BRAZERO or BRAMINZERO. */
667 BEGIN_OPCODE(BRAZERO): {
668 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
669 stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr + 1);
670 stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
671 minSatNextBracket.set();
672 RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain, true);
675 stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
676 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
677 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
681 BEGIN_OPCODE(BRAMINZERO): {
682 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
683 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
684 RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain, false);
687 stack.currentFrame->args.instructionPtr++;
691 /* End of a group, repeated or non-repeating. If we are at the end of
692 an assertion "group", stop matching and return 1, but record the
693 current high water mark for use by positive assertions. Do this also
694 for the "once" (not-backup up) groups. */
697 BEGIN_OPCODE(KETRMIN):
698 BEGIN_OPCODE(KETRMAX):
699 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
700 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
701 stack.currentFrame->locals.minSatisfied = stack.currentFrame->args.bracketChain->minSatisfied;
703 /* Back up the stack of bracket start pointers. */
705 stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
707 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
708 md.endOffsetTop = stack.currentFrame->args.offsetTop;
713 /* In all other cases except a conditional group we have to check the
714 group number back at the start and if necessary complete handling an
715 extraction by setting the offsets and bumping the high water mark. */
717 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
719 /* For extended extraction brackets (large number), we have to fish out
720 the number from a dummy opcode at the start. */
722 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
723 stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 4 + LINK_SIZE);
724 stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number;
726 DPRINTF(("end bracket %d\n", stack.currentFrame->locals.number));
728 /* Test for a numbered group. This includes groups called as a result
729 of recursion. Note that whole-pattern recursion is coded as a recurse
730 into group 0, so it won't be picked up here. Instead, we catch it when
731 the OP_END is reached. */
733 if (stack.currentFrame->locals.number > 0) {
734 if (stack.currentFrame->locals.offset >= md.offsetMax)
735 md.offsetOverflow = true;
737 int start = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
738 int end = stack.currentFrame->args.subjectPtr - md.startSubject;
739 if (start == end && stack.currentFrame->locals.minSatisfied) {
740 DPRINTF(("empty string while group already matched; bailing"));
743 DPRINTF(("saving; start: %d; end: %d\n", start, end));
744 JS_ASSERT(start <= end);
745 md.setOffsetPair(stack.currentFrame->locals.number, start, end);
746 if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
747 stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
751 /* For a non-repeating ket, just continue at this level. This also
752 happens for a repeating ket if no characters were matched in the group.
753 This is the forcible breaking of infinite loops as implemented in Perl
754 5.005. If there is an options reset, it will get obeyed in the normal
757 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
758 DPRINTF(("non-repeating ket or empty match\n"));
759 if (stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction && stack.currentFrame->locals.minSatisfied) {
760 DPRINTF(("empty string while group already matched; bailing"));
763 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
767 /* The repeating kets try the rest of the pattern or restart from the
768 preceding bracket, in the appropriate order. */
770 stack.currentFrame->extractBrackets(LOCALS(instructionPtrAtStartOfOnce));
771 JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket));
772 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
773 stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
774 RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
778 stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
779 DPRINTF(("recursively matching lazy group\n"));
780 minSatNextBracket.set();
781 RECURSIVE_MATCH_NEW_GROUP(17, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true);
782 } else { /* OP_KETRMAX */
783 stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
784 stack.currentFrame->clobberOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
785 DPRINTF(("recursively matching greedy group\n"));
786 minSatNextBracket.set();
787 RECURSIVE_MATCH_NEW_GROUP(18, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true);
791 stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
792 RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
796 /* Start of subject. */
799 if (stack.currentFrame->args.subjectPtr != md.startSubject)
801 stack.currentFrame->args.instructionPtr++;
804 /* After internal newline if multiline. */
807 if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
809 stack.currentFrame->args.instructionPtr++;
812 /* End of subject. */
815 if (stack.currentFrame->args.subjectPtr < md.endSubject)
817 stack.currentFrame->args.instructionPtr++;
820 /* Before internal newline if multiline. */
823 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
825 stack.currentFrame->args.instructionPtr++;
828 /* Word boundary assertions */
830 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
831 BEGIN_OPCODE(WORD_BOUNDARY): {
832 bool currentCharIsWordChar = false;
833 bool previousCharIsWordChar = false;
835 if (stack.currentFrame->args.subjectPtr > md.startSubject)
836 previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
837 if (stack.currentFrame->args.subjectPtr < md.endSubject)
838 currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
840 /* Now see if the situation is what we want */
841 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
842 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
847 /* Match a single character type; inline for speed */
849 BEGIN_OPCODE(NOT_NEWLINE):
850 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
852 if (isNewline(*stack.currentFrame->args.subjectPtr++))
854 stack.currentFrame->args.instructionPtr++;
857 BEGIN_OPCODE(NOT_DIGIT):
858 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
860 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
862 stack.currentFrame->args.instructionPtr++;
866 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
868 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
870 stack.currentFrame->args.instructionPtr++;
873 BEGIN_OPCODE(NOT_WHITESPACE):
874 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
876 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
878 stack.currentFrame->args.instructionPtr++;
881 BEGIN_OPCODE(WHITESPACE):
882 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
884 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
886 stack.currentFrame->args.instructionPtr++;
889 BEGIN_OPCODE(NOT_WORDCHAR):
890 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
892 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
894 stack.currentFrame->args.instructionPtr++;
897 BEGIN_OPCODE(WORDCHAR):
898 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
900 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
902 stack.currentFrame->args.instructionPtr++;
905 /* Match a back reference, possibly repeatedly. Look past the end of the
906 item to see if there is repeat information following. The code is similar
907 to that for character classes, but repeated for efficiency. Then obey
908 similar code to character type repeats - written out again for speed.
909 However, if the referenced string is the empty string, always treat
910 it as matched, any number of times (otherwise there could be infinite
914 stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1; /* Doubled ref number */
915 stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
917 /* If the reference is unset, set the length to be longer than the amount
918 of subject left; this ensures that every attempt at a match fails. We
919 can't just fail here, because of the possibility of quantifiers with zero
922 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
923 stack.currentFrame->locals.length = 0;
925 stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
927 /* Set up for repetition, or handle the non-repeated case */
929 switch (*stack.currentFrame->args.instructionPtr) {
936 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
941 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
942 min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
943 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
944 if (stack.currentFrame->locals.max == 0)
945 stack.currentFrame->locals.max = INT_MAX;
946 stack.currentFrame->args.instructionPtr += 5;
949 default: /* No repeat follows */
950 if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
952 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
956 /* If the length of the reference is zero, just continue with the
959 if (stack.currentFrame->locals.length == 0)
962 /* First, ensure the minimum number of matches are present. */
964 for (int i = 1; i <= min; i++) {
965 if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
967 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
970 /* If min = max, continue at the same level without recursion.
971 They are not both allowed to be zero. */
973 if (min == stack.currentFrame->locals.max)
976 /* If minimizing, keep trying and advancing the pointer */
979 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
980 RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
983 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
985 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
987 /* Control never reaches here */
990 /* If maximizing, find the longest string and work backwards */
993 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
994 for (int i = min; i < stack.currentFrame->locals.max; i++) {
995 if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
997 stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
999 while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1000 RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1003 stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
1007 /* Control never reaches here */
1009 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1010 used when all the characters in the class have values in the range 0-255,
1011 and either the matching is caseful, or the characters are in the range
1012 0-127 when UTF-8 processing is enabled. The only difference between
1013 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1016 First, look past the end of the item to see if there is repeat information
1017 following. Then obey similar code to character type repeats - written out
1020 BEGIN_OPCODE(NCLASS):
1021 BEGIN_OPCODE(CLASS):
1022 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */
1023 stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
1025 switch (*stack.currentFrame->args.instructionPtr) {
1032 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1037 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1038 min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1039 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
1040 if (stack.currentFrame->locals.max == 0)
1041 stack.currentFrame->locals.max = INT_MAX;
1042 stack.currentFrame->args.instructionPtr += 5;
1045 default: /* No repeat follows */
1046 min = stack.currentFrame->locals.max = 1;
1050 /* First, ensure the minimum number of matches are present. */
1052 for (int i = 1; i <= min; i++) {
1053 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1055 int c = *stack.currentFrame->args.subjectPtr++;
1057 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1060 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1065 /* If max == min we can continue with the main loop without the
1068 if (min == stack.currentFrame->locals.max)
1071 /* If minimizing, keep testing the rest of the expression and advancing
1072 the pointer while it matches the class. */
1074 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1075 RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1078 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1080 int c = *stack.currentFrame->args.subjectPtr++;
1082 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1085 if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
1089 /* Control never reaches here */
1091 /* If maximizing, find the longest possible run, then work backwards. */
1093 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1095 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1096 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1098 int c = *stack.currentFrame->args.subjectPtr;
1100 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1103 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1106 ++stack.currentFrame->args.subjectPtr;
1109 RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1112 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1113 break; /* Stop if tried at original pos */
1118 /* Control never reaches here */
1120 /* Match an extended character class. */
1122 BEGIN_OPCODE(XCLASS):
1123 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */
1124 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); /* Advance past the item */
1126 switch (*stack.currentFrame->args.instructionPtr) {
1133 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1138 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1139 min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1140 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
1141 if (stack.currentFrame->locals.max == 0)
1142 stack.currentFrame->locals.max = INT_MAX;
1143 stack.currentFrame->args.instructionPtr += 5;
1146 default: /* No repeat follows */
1147 min = stack.currentFrame->locals.max = 1;
1150 /* First, ensure the minimum number of matches are present. */
1152 for (int i = 1; i <= min; i++) {
1153 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1155 int c = *stack.currentFrame->args.subjectPtr++;
1156 if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
1160 /* If max == min we can continue with the main loop without the
1163 if (min == stack.currentFrame->locals.max)
1166 /* If minimizing, keep testing the rest of the expression and advancing
1167 the pointer while it matches the class. */
1170 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1171 RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1174 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1176 int c = *stack.currentFrame->args.subjectPtr++;
1177 if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
1180 /* Control never reaches here */
1183 /* If maximizing, find the longest possible run, then work backwards. */
1186 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1187 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1188 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1190 int c = *stack.currentFrame->args.subjectPtr;
1191 if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
1193 ++stack.currentFrame->args.subjectPtr;
1196 RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1199 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1200 break; /* Stop if tried at original pos */
1205 /* Control never reaches here */
1207 /* Match a single character, casefully */
1210 stack.currentFrame->locals.length = 1;
1211 stack.currentFrame->args.instructionPtr++;
1212 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1213 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1214 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1216 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
1220 /* Match a single character, caselessly */
1222 BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1223 stack.currentFrame->locals.length = 1;
1224 stack.currentFrame->args.instructionPtr++;
1225 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1226 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1227 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1229 int dc = *stack.currentFrame->args.subjectPtr++;
1230 if (stack.currentFrame->locals.fc != dc && jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
1235 /* Match a single ASCII character. */
1237 BEGIN_OPCODE(ASCII_CHAR):
1238 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1240 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
1242 ++stack.currentFrame->args.subjectPtr;
1243 stack.currentFrame->args.instructionPtr += 2;
1246 /* Match one of two cases of an ASCII letter. */
1248 BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1249 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1251 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
1253 ++stack.currentFrame->args.subjectPtr;
1254 stack.currentFrame->args.instructionPtr += 2;
1257 /* Match a single character repeatedly; different opcodes share code. */
1259 BEGIN_OPCODE(EXACT):
1260 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1262 stack.currentFrame->args.instructionPtr += 3;
1266 BEGIN_OPCODE(MINUPTO):
1268 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1269 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
1270 stack.currentFrame->args.instructionPtr += 3;
1274 BEGIN_OPCODE(MINSTAR):
1276 BEGIN_OPCODE(MINPLUS):
1277 BEGIN_OPCODE(QUERY):
1278 BEGIN_OPCODE(MINQUERY):
1279 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1281 /* Common code for all repeated single-character matches. We can give
1282 up quickly if there are fewer than the minimum number of characters left in
1287 stack.currentFrame->locals.length = 1;
1288 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1289 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
1291 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
1293 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1294 othercase = md.ignoreCase ? jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
1296 for (int i = 1; i <= min; i++) {
1297 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1299 ++stack.currentFrame->args.subjectPtr;
1302 if (min == stack.currentFrame->locals.max)
1306 stack.currentFrame->locals.repeatOthercase = othercase;
1307 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1308 RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1311 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1313 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
1315 ++stack.currentFrame->args.subjectPtr;
1317 /* Control never reaches here */
1319 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1320 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1321 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1323 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1325 ++stack.currentFrame->args.subjectPtr;
1327 while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1328 RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1331 --stack.currentFrame->args.subjectPtr;
1335 /* Control never reaches here */
1337 /* No case on surrogate pairs, so no need to bother with "othercase". */
1339 for (int i = 1; i <= min; i++) {
1340 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1342 stack.currentFrame->args.subjectPtr += 2;
1345 if (min == stack.currentFrame->locals.max)
1349 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1350 RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1353 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1355 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1357 stack.currentFrame->args.subjectPtr += 2;
1359 /* Control never reaches here */
1361 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1362 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1363 if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
1365 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
1367 stack.currentFrame->args.subjectPtr += 2;
1369 while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
1370 RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1373 stack.currentFrame->args.subjectPtr -= 2;
1377 /* Control never reaches here */
1379 /* Control never reaches here */
1381 /* Match a negated single one-byte character. */
1383 BEGIN_OPCODE(NOT): {
1384 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1386 int b = stack.currentFrame->args.instructionPtr[1];
1387 int c = *stack.currentFrame->args.subjectPtr++;
1388 stack.currentFrame->args.instructionPtr += 2;
1389 if (md.ignoreCase) {
1392 if (toLowerCase(b) == c)
1401 /* Match a negated single one-byte character repeatedly. This is almost a
1402 repeat of the code for a repeated single character, but I haven't found a
1403 nice way of commoning these up that doesn't require a test of the
1404 positive/negative option for each character match. Maybe that wouldn't add
1405 very much to the time taken, but character matching *is* what this is all
1408 BEGIN_OPCODE(NOTEXACT):
1409 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1411 stack.currentFrame->args.instructionPtr += 3;
1414 BEGIN_OPCODE(NOTUPTO):
1415 BEGIN_OPCODE(NOTMINUPTO):
1417 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1418 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
1419 stack.currentFrame->args.instructionPtr += 3;
1422 BEGIN_OPCODE(NOTSTAR):
1423 BEGIN_OPCODE(NOTMINSTAR):
1424 BEGIN_OPCODE(NOTPLUS):
1425 BEGIN_OPCODE(NOTMINPLUS):
1426 BEGIN_OPCODE(NOTQUERY):
1427 BEGIN_OPCODE(NOTMINQUERY):
1428 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1430 /* Common code for all repeated single-byte matches. We can give up quickly
1431 if there are fewer than the minimum number of bytes left in the
1435 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1437 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
1439 /* The code is duplicated for the caseless and caseful cases, for speed,
1440 since matching characters is likely to be quite common. First, ensure the
1441 minimum number of matches are present. If min = max, continue at the same
1442 level without recursing. Otherwise, if minimizing, keep trying the rest of
1443 the expression and advancing one matching character if failing, up to the
1444 maximum. Alternatively, if maximizing, find the maximum number of
1445 characters and work backwards. */
1447 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
1449 if (md.ignoreCase) {
1450 if (stack.currentFrame->locals.fc < 128)
1451 stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
1453 for (int i = 1; i <= min; i++) {
1454 int d = *stack.currentFrame->args.subjectPtr++;
1457 if (stack.currentFrame->locals.fc == d)
1461 if (min == stack.currentFrame->locals.max)
1465 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1466 RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1469 int d = *stack.currentFrame->args.subjectPtr++;
1472 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1475 /* Control never reaches here */
1481 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1483 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1484 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1486 int d = *stack.currentFrame->args.subjectPtr;
1489 if (stack.currentFrame->locals.fc == d)
1491 ++stack.currentFrame->args.subjectPtr;
1494 RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1497 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1498 break; /* Stop if tried at original pos */
1503 /* Control never reaches here */
1506 /* Caseful comparisons */
1509 for (int i = 1; i <= min; i++) {
1510 int d = *stack.currentFrame->args.subjectPtr++;
1511 if (stack.currentFrame->locals.fc == d)
1515 if (min == stack.currentFrame->locals.max)
1519 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1520 RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1523 int d = *stack.currentFrame->args.subjectPtr++;
1524 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
1527 /* Control never reaches here */
1533 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1535 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1536 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1538 int d = *stack.currentFrame->args.subjectPtr;
1539 if (stack.currentFrame->locals.fc == d)
1541 ++stack.currentFrame->args.subjectPtr;
1544 RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1547 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1548 break; /* Stop if tried at original pos */
1554 /* Control never reaches here */
1556 /* Match a single character type repeatedly; several different opcodes
1557 share code. This is very similar to the code for single characters, but we
1558 repeat it in the interests of efficiency. */
1560 BEGIN_OPCODE(TYPEEXACT):
1561 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1563 stack.currentFrame->args.instructionPtr += 3;
1566 BEGIN_OPCODE(TYPEUPTO):
1567 BEGIN_OPCODE(TYPEMINUPTO):
1569 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1570 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
1571 stack.currentFrame->args.instructionPtr += 3;
1574 BEGIN_OPCODE(TYPESTAR):
1575 BEGIN_OPCODE(TYPEMINSTAR):
1576 BEGIN_OPCODE(TYPEPLUS):
1577 BEGIN_OPCODE(TYPEMINPLUS):
1578 BEGIN_OPCODE(TYPEQUERY):
1579 BEGIN_OPCODE(TYPEMINQUERY):
1580 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1582 /* Common code for all repeated single character type matches. Note that
1583 in UTF-8 mode, '.' matches a character of any length, but for the other
1584 character types, the valid characters are all one-byte long. */
1587 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */
1589 /* First, ensure the minimum number of matches are present. Use inline
1590 code for maximizing the speed, and do the type test once at the start
1591 (i.e. keep it out of the loop). Also we can test that there are at least
1592 the minimum number of characters before we start. */
1594 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1597 switch (stack.currentFrame->locals.ctype) {
1598 case OP_NOT_NEWLINE:
1599 for (int i = 1; i <= min; i++) {
1600 if (isNewline(*stack.currentFrame->args.subjectPtr))
1602 ++stack.currentFrame->args.subjectPtr;
1607 for (int i = 1; i <= min; i++) {
1608 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1610 ++stack.currentFrame->args.subjectPtr;
1615 for (int i = 1; i <= min; i++) {
1616 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
1618 ++stack.currentFrame->args.subjectPtr;
1622 case OP_NOT_WHITESPACE:
1623 for (int i = 1; i <= min; i++) {
1624 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
1626 ++stack.currentFrame->args.subjectPtr;
1631 for (int i = 1; i <= min; i++) {
1632 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
1634 ++stack.currentFrame->args.subjectPtr;
1638 case OP_NOT_WORDCHAR:
1639 for (int i = 1; i <= min; i++) {
1640 if (isWordChar(*stack.currentFrame->args.subjectPtr))
1642 ++stack.currentFrame->args.subjectPtr;
1647 for (int i = 1; i <= min; i++) {
1648 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
1650 ++stack.currentFrame->args.subjectPtr;
1655 JS_NOT_REACHED("Invalid character type.");
1656 return matchError(JSRegExpErrorInternal, stack);
1657 } /* End switch(stack.currentFrame->locals.ctype) */
1660 /* If min = max, continue at the same level without recursing */
1662 if (min == stack.currentFrame->locals.max)
1665 /* If minimizing, we have to test the rest of the pattern before each
1666 subsequent match. */
1669 for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
1670 RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1673 if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1676 int c = *stack.currentFrame->args.subjectPtr++;
1677 switch (stack.currentFrame->locals.ctype) {
1678 case OP_NOT_NEWLINE:
1684 if (isASCIIDigit(c))
1689 if (!isASCIIDigit(c))
1693 case OP_NOT_WHITESPACE:
1699 if (!isSpaceChar(c))
1703 case OP_NOT_WORDCHAR:
1714 JS_NOT_REACHED("Invalid character type.");
1715 return matchError(JSRegExpErrorInternal, stack);
1718 /* Control never reaches here */
1721 /* If maximizing it is worth using inline code for speed, doing the type
1722 test once at the start (i.e. keep it out of the loop). */
1725 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
1727 switch (stack.currentFrame->locals.ctype) {
1728 case OP_NOT_NEWLINE:
1729 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1730 if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
1732 stack.currentFrame->args.subjectPtr++;
1737 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1738 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1740 int c = *stack.currentFrame->args.subjectPtr;
1741 if (isASCIIDigit(c))
1743 ++stack.currentFrame->args.subjectPtr;
1748 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1749 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1751 int c = *stack.currentFrame->args.subjectPtr;
1752 if (!isASCIIDigit(c))
1754 ++stack.currentFrame->args.subjectPtr;
1758 case OP_NOT_WHITESPACE:
1759 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1760 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1762 int c = *stack.currentFrame->args.subjectPtr;
1765 ++stack.currentFrame->args.subjectPtr;
1770 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1771 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1773 int c = *stack.currentFrame->args.subjectPtr;
1774 if (!isSpaceChar(c))
1776 ++stack.currentFrame->args.subjectPtr;
1780 case OP_NOT_WORDCHAR:
1781 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1782 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1784 int c = *stack.currentFrame->args.subjectPtr;
1787 ++stack.currentFrame->args.subjectPtr;
1792 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1793 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1795 int c = *stack.currentFrame->args.subjectPtr;
1798 ++stack.currentFrame->args.subjectPtr;
1803 JS_NOT_REACHED("Invalid character type.");
1804 return matchError(JSRegExpErrorInternal, stack);
1807 /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
1810 RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1813 if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
1814 break; /* Stop if tried at original pos */
1817 /* Get here if we can't make it match with any permitted repetitions */
1821 /* Control never reaches here */
1823 BEGIN_OPCODE(CRMINPLUS):
1824 BEGIN_OPCODE(CRMINQUERY):
1825 BEGIN_OPCODE(CRMINRANGE):
1826 BEGIN_OPCODE(CRMINSTAR):
1827 BEGIN_OPCODE(CRPLUS):
1828 BEGIN_OPCODE(CRQUERY):
1829 BEGIN_OPCODE(CRRANGE):
1830 BEGIN_OPCODE(CRSTAR):
1831 JS_NOT_REACHED("Invalid opcode.");
1832 return matchError(JSRegExpErrorInternal, stack);
1834 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1839 /* Opening capturing bracket. If there is space in the offset vector, save
1840 the current subject position in the working slot at the top of the vector. We
1841 mustn't change the current values of the data slot, because they may be set
1842 from a previous iteration of this group, and be referred to by a reference
1845 If the bracket fails to match, we need to restore this value and also the
1846 values of the final offsets, in case they were set by a previous iteration of
1849 If there isn't enough space in the offset vector, treat this as if it were a
1850 non-capturing bracket. Don't worry about setting the flag for the error case
1851 here; that is handled in the code for KET. */
1853 JS_ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1855 LOCALS(number) = *stack.currentFrame->args.instructionPtr - OP_BRA;
1856 stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr);
1857 DPRINTF(("opening capturing bracket %d\n", stack.currentFrame->locals.number));
1859 /* For extended extraction brackets (large number), we have to fish out the
1860 number from a dummy opcode at the start. */
1862 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1863 stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 4 + LINK_SIZE);
1864 stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number;
1866 JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket));
1868 if (stack.currentFrame->locals.offset < md.offsetMax) {
1869 stack.currentFrame->locals.savedSubjectOffset = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
1870 DPRINTF(("setting subject offset for bracket to %d\n", stack.currentFrame->args.subjectPtr - md.startSubject));
1871 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
1872 stack.currentFrame->locals.skipBytes = 3; /* For OP_BRAs. */
1874 /* We must compute this value at the top, before we move the instruction pointer. */
1875 stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear();
1877 /* We need to extract this into a variable so we can correctly pass it by value
1878 through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */
1879 minSatisfied = stack.currentFrame->locals.minSatisfied;
1880 RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied);
1883 stack.currentFrame->locals.skipBytes = 1; /* For OP_ALTs. */
1884 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
1885 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
1887 DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
1888 for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
1889 md.setOffsetPair(i, -1, -1);
1890 DPRINTF(("restoring subject offset for bracket to %d\n", stack.currentFrame->locals.savedSubjectOffset));
1891 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.savedSubjectOffset;
1896 /* Insufficient room for saving captured contents */
1898 goto NON_CAPTURING_BRACKET;
1901 /* Do not stick any code in here without much thought; it is assumed
1902 that "continue" in the code above comes out to here to repeat the main
1905 } /* End of main loop */
1907 JS_NOT_REACHED("Loop does not fallthru.");
1909 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1912 switch (stack.currentFrame->returnLocation) {
1913 case 0: goto RETURN;
1914 case 1: goto RRETURN_1;
1915 case 2: goto RRETURN_2;
1916 case 6: goto RRETURN_6;
1917 case 7: goto RRETURN_7;
1918 case 14: goto RRETURN_14;
1919 case 15: goto RRETURN_15;
1920 case 16: goto RRETURN_16;
1921 case 17: goto RRETURN_17;
1922 case 18: goto RRETURN_18;
1923 case 19: goto RRETURN_19;
1924 case 20: goto RRETURN_20;
1925 case 21: goto RRETURN_21;
1926 case 22: goto RRETURN_22;
1927 case 24: goto RRETURN_24;
1928 case 26: goto RRETURN_26;
1929 case 27: goto RRETURN_27;
1930 case 28: goto RRETURN_28;
1931 case 29: goto RRETURN_29;
1932 case 30: goto RRETURN_30;
1933 case 31: goto RRETURN_31;
1934 case 38: goto RRETURN_38;
1935 case 40: goto RRETURN_40;
1936 case 42: goto RRETURN_42;
1937 case 44: goto RRETURN_44;
1938 case 48: goto RRETURN_48;
1939 case 52: goto RRETURN_52;
1942 JS_NOT_REACHED("Bad computed return location.");
1943 return matchError(JSRegExpErrorInternal, stack);
1952 /*************************************************
1953 * Execute a Regular Expression *
1954 *************************************************/
1956 /* This function applies a compiled re to a subject string and picks out
1957 portions of the string if it matches. Two elements in the vector are set for
1958 each substring: the offsets to the start and end of the substring.
1961 re points to the compiled expression
1962 extra_data points to extra data or is NULL
1963 subject points to the subject string
1964 length length of subject string (may contain binary zeros)
1965 start_offset where to start in the subject string
1967 offsets points to a vector of ints to be filled in with offsets
1968 offsetCount the number of elements in the vector
1970 Returns: > 0 => success; value is the number of elements filled in
1971 = 0 => success, but offsets is not big enough
1972 -1 => failed to match
1973 < -1 => some kind of unexpected problem
1976 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
1978 // If firstByte is set, try scanning to the first instance of that byte
1979 // no need to try and match against any earlier part of the subject string.
1980 if (firstByte >= 0) {
1981 UChar firstChar = firstByte;
1982 if (firstByteIsCaseless)
1983 while (subjectPtr < endSubject) {
1984 int c = *subjectPtr;
1987 if (toLowerCase(c) == firstChar)
1992 while (subjectPtr < endSubject && *subjectPtr != firstChar)
1995 } else if (useMultiLineFirstCharOptimization) {
1996 /* Or to just after \n for a multiline match if possible */
1997 // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
1998 if (subjectPtr > originalSubjectStart) {
1999 while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
2005 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr)
2007 /* If reqByte is set, we know that that character must appear in the subject
2008 for the match to succeed. If the first character is set, reqByte must be
2009 later in the subject; otherwise the test starts at the match point. This
2010 optimization can save a huge amount of backtracking in patterns with nested
2011 unlimited repeats that aren't going to match. Writing separate code for
2012 cased/caseless versions makes it go faster, as does using an autoincrement
2013 and backing off on a match.
2015 HOWEVER: when the subject string is very, very long, searching to its end can
2016 take a long time, and give bad performance on quite ordinary patterns. This
2017 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2018 don't do this when the string is sufficiently long.
2021 if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
2022 const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
2024 /* We don't need to repeat the search if we haven't yet reached the
2025 place we found it at last time. */
2027 if (p > reqBytePtr) {
2028 if (reqByteIsCaseless) {
2029 while (p < endSubject) {
2031 if (pp == reqByte || pp == reqByte2) {
2037 while (p < endSubject) {
2038 if (*p++ == reqByte) {
2045 /* If we can't find the required character, break the matching loop */
2047 if (p >= endSubject)
2050 /* If we have found the required character, save the point where we
2051 found it, so that we don't search again next time round the loop if
2052 the start hasn't passed this character yet. */
2060 int jsRegExpExecute(JSContext *cx, const JSRegExp* re,
2061 const UChar* subject, int length, int start_offset, int* offsets,
2065 JS_ASSERT(subject || !length);
2066 JS_ASSERT(offsetCount >= 0);
2067 JS_ASSERT(offsets || offsetCount == 0);
2069 MatchData matchBlock;
2070 matchBlock.startSubject = subject;
2071 matchBlock.endSubject = matchBlock.startSubject + length;
2072 const UChar* endSubject = matchBlock.endSubject;
2074 matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
2075 matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
2077 /* Use the vector supplied, rounding down its size to a multiple of 3. */
2078 int ocount = offsetCount - (offsetCount % 3);
2080 matchBlock.offsetVector = offsets;
2081 matchBlock.offsetEnd = ocount;
2082 matchBlock.offsetMax = (2*ocount)/3;
2083 matchBlock.offsetOverflow = false;
2085 /* Compute the minimum number of offsets that we need to reset each time. Doing
2086 this makes a huge difference to execution time when there aren't many brackets
2089 int resetCount = 2 + re->topBracket * 2;
2090 if (resetCount > offsetCount)
2091 resetCount = ocount;
2093 /* Reset the working variable associated with each extraction. These should
2094 never be used unless previously set, but they get saved and restored, and so we
2095 initialize them to avoid reading uninitialized locations. */
2097 if (matchBlock.offsetVector) {
2098 int* iptr = matchBlock.offsetVector + ocount;
2099 int* iend = iptr - resetCount/2 + 1;
2100 while (--iptr >= iend)
2104 /* Set up the first character to match, if available. The firstByte value is
2105 never set for an anchored regular expression, but the anchoring may be forced
2106 at run time, so we have to test for anchoring. The first char may be unset for
2107 an unanchored pattern, of course. If there's no first char and the pattern was
2108 studied, there may be a bitmap of possible first characters. */
2110 bool firstByteIsCaseless = false;
2112 if (re->options & UseFirstByteOptimizationOption) {
2113 firstByte = re->firstByte & 255;
2114 if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE)))
2115 firstByte = toLowerCase(firstByte);
2118 /* For anchored or unanchored matches, there may be a "last known required
2121 bool reqByteIsCaseless = false;
2124 if (re->options & UseRequiredByteOptimizationOption) {
2125 reqByte = re->reqByte & 255;
2126 reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE);
2127 reqByte2 = flipCase(reqByte);
2130 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
2131 the loop runs just once. */
2133 const UChar* startMatch = subject + start_offset;
2134 const UChar* reqBytePtr = startMatch - 1;
2135 bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
2138 /* Reset the maximum number of extractions we might see. */
2139 if (matchBlock.offsetVector) {
2140 int* iptr = matchBlock.offsetVector;
2141 int* iend = iptr + resetCount;
2146 tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
2147 if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr))
2150 /* When a match occurs, substrings will be set for all internal extractions;
2151 we just need to set up the whole thing as substring 0 before returning. If
2152 there were too many extractions, set the return code to zero. In the case
2153 where we had to get some local store to hold offsets for backreferences, copy
2154 those back references that we can. In this case there need not be overflow
2155 if certain parts of the pattern were not used. */
2157 /* The code starts after the JSRegExp block and the capture name table. */
2158 const unsigned char* start_code = (const unsigned char*)(re + 1);
2160 int returnCode = match(&cx->regExpPool, startMatch, start_code, 2, matchBlock);
2162 /* When the result is no match, advance the pointer to the next character
2164 if (returnCode == 0) {
2169 if (returnCode != 1) {
2170 JS_ASSERT(returnCode == JSRegExpErrorHitLimit);
2171 DPRINTF((">>>> error: returning %d\n", returnCode));
2175 /* We have a match! */
2177 returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
2179 if (offsetCount < 2)
2182 offsets[0] = startMatch - matchBlock.startSubject;
2183 offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
2186 JS_ASSERT(returnCode >= 0);
2187 DPRINTF((">>>> returning %d\n", returnCode));
2189 } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
2191 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2192 return JSRegExpErrorNoMatch;