glib/pcre/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2008 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40
  41 /* This module contains pcre_exec(), the externally visible function that does
  42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  43 possible. There are also some static supporting functions. */
  44
  45 #ifdef HAVE_CONFIG_H
  46 #include "config.h"
  47 #endif
  48
  49 #define NLBLOCK md             /* Block containing newline information */
  50 #define PSSTART start_subject  /* Field containing processed string start */
  51 #define PSEND   end_subject    /* Field containing processed string end */
  52
  53 #include "pcre_internal.h"
  54
  55 /* Undefine some potentially clashing cpp symbols */
  56
  57 #undef min
  58 #undef max
  59
  60 /* Flag bits for the match() function */
  61
  62 #define match_condassert     0x01  /* Called to check a condition assertion */
  63 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  64
  65 /* Non-error returns from the match() function. Error returns are externally
  66 defined PCRE_ERROR_xxx codes, which are all negative. */
  67
  68 #define MATCH_MATCH        1
  69 #define MATCH_NOMATCH      0
  70
  71 /* Special internal returns from the match() function. Make them sufficiently
  72 negative to avoid the external error codes. */
  73
  74 #define MATCH_COMMIT       (-999)
  75 #define MATCH_PRUNE        (-998)
  76 #define MATCH_SKIP         (-997)
  77 #define MATCH_THEN         (-996)
  78
  79 /* Maximum number of ints of offset to save on the stack for recursive calls.
  80 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  81 because the offset vector is always a multiple of 3 long. */
  82
  83 #define REC_STACK_SAVE_MAX 30
  84
  85 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  86
  87 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  88 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  89
  90
  91
  92 #ifdef DEBUG
  93 /*************************************************
  94 *        Debugging function to print chars       *
  95 *************************************************/
  96
  97 /* Print a sequence of chars in printable format, stopping at the end of the
  98 subject if the requested.
  99
 100 Arguments:
 101   p           points to characters
 102   length      number to print
 103   is_subject  TRUE if printing from within md->start_subject
 104   md          pointer to matching data block, if is_subject is TRUE
 105
 106 Returns:     nothing
 107 */
 108
 109 static void
 110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 111 {
 112 unsigned int c;
 113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 114 while (length-- > 0)
 115   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 116 }
 117 #endif
 118
 119
 120
 121 /*************************************************
 122 *          Match a back-reference                *
 123 *************************************************/
 124
 125 /* If a back reference hasn't been set, the length that is passed is greater
 126 than the number of characters left in the string, so the match fails.
 127
 128 Arguments:
 129   offset      index into the offset vector
 130   eptr        points into the subject
 131   length      length to be matched
 132   md          points to match data block
 133   ims         the ims flags
 134
 135 Returns:      TRUE if matched
 136 */
 137
 138 static BOOL
 139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 140   unsigned long int ims)
 141 {
 142 USPTR p = md->start_subject + md->offset_vector[offset];
 143
 144 #ifdef DEBUG
 145 if (eptr >= md->end_subject)
 146   printf("matching subject <null>");
 147 else
 148   {
 149   printf("matching subject ");
 150   pchars(eptr, length, TRUE, md);
 151   }
 152 printf(" against backref ");
 153 pchars(p, length, FALSE, md);
 154 printf("\n");
 155 #endif
 156
 157 /* Always fail if not enough characters left */
 158
 159 if (length > md->end_subject - eptr) return FALSE;
 160
 161 /* Separate the caselesss case for speed */
 162
 163 if ((ims & PCRE_CASELESS) != 0)
 164   {
 165   while (length-- > 0)
 166     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
 167   }
 168 else
 169   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 170
 171 return TRUE;
 172 }
 173
 174
 175
 176 /***************************************************************************
 177 ****************************************************************************
 178                    RECURSION IN THE match() FUNCTION
 179
 180 The match() function is highly recursive, though not every recursive call
 181 increases the recursive depth. Nevertheless, some regular expressions can cause
 182 it to recurse to a great depth. I was writing for Unix, so I just let it call
 183 itself recursively. This uses the stack for saving everything that has to be
 184 saved for a recursive call. On Unix, the stack can be large, and this works
 185 fine.
 186
 187 It turns out that on some non-Unix-like systems there are problems with
 188 programs that use a lot of stack. (This despite the fact that every last chip
 189 has oodles of memory these days, and techniques for extending the stack have
 190 been known for decades.) So....
 191
 192 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 193 calls by keeping local variables that need to be preserved in blocks of memory
 194 obtained from malloc() instead instead of on the stack. Macros are used to
 195 achieve this so that the actual code doesn't look very different to what it
 196 always used to.
 197
 198 The original heap-recursive code used longjmp(). However, it seems that this
 199 can be very slow on some operating systems. Following a suggestion from Stan
 200 Switzer, the use of longjmp() has been abolished, at the cost of having to
 201 provide a unique number for each call to RMATCH. There is no way of generating
 202 a sequence of numbers at compile time in C. I have given them names, to make
 203 them stand out more clearly.
 204
 205 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 206 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 207 tests. Furthermore, not using longjmp() means that local dynamic variables
 208 don't have indeterminate values; this has meant that the frame size can be
 209 reduced because the result can be "passed back" by straight setting of the
 210 variable instead of being passed in the frame.
 211 ****************************************************************************
 212 ***************************************************************************/
 213
 214 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 215 below must be updated in sync.  */
 216
 217 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 218        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 219        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 220        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 221        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 222        RM51,  RM52, RM53, RM54 };
 223
 224 /* These versions of the macros use the stack, as normal. There are debugging
 225 versions and production versions. Note that the "rw" argument of RMATCH isn't
 226 actuall used in this definition. */
 227
 228 #ifndef NO_RECURSE
 229 #define REGISTER register
 230
 231 #ifdef DEBUG
 232 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 233   { \
 234   printf("match() called in line %d\n", __LINE__); \
 235   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
 236   printf("to line %d\n", __LINE__); \
 237   }
 238 #define RRETURN(ra) \
 239   { \
 240   printf("match() returned %d from line %d ", ra, __LINE__); \
 241   return ra; \
 242   }
 243 #else
 244 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 245   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
 246 #define RRETURN(ra) return ra
 247 #endif
 248
 249 #else
 250
 251
 252 /* These versions of the macros manage a private stack on the heap. Note that
 253 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 254 argument of match(), which never changes. */
 255
 256 #define REGISTER
 257
 258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
 259   {\
 260   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
 261   frame->Xwhere = rw; \
 262   newframe->Xeptr = ra;\
 263   newframe->Xecode = rb;\
 264   newframe->Xmstart = mstart;\
 265   newframe->Xoffset_top = rc;\
 266   newframe->Xims = re;\
 267   newframe->Xeptrb = rf;\
 268   newframe->Xflags = rg;\
 269   newframe->Xrdepth = frame->Xrdepth + 1;\
 270   newframe->Xprevframe = frame;\
 271   frame = newframe;\
 272   DPRINTF(("restarting from line %d\n", __LINE__));\
 273   goto HEAP_RECURSE;\
 274   L_##rw:\
 275   DPRINTF(("jumped back to line %d\n", __LINE__));\
 276   }
 277
 278 #define RRETURN(ra)\
 279   {\
 280   heapframe *newframe = frame;\
 281   frame = newframe->Xprevframe;\
 282   (pcre_stack_free)(newframe);\
 283   if (frame != NULL)\
 284     {\
 285     rrc = ra;\
 286     goto HEAP_RETURN;\
 287     }\
 288   return ra;\
 289   }
 290
 291
 292 /* Structure for remembering the local variables in a private frame */
 293
 294 typedef struct heapframe {
 295   struct heapframe *Xprevframe;
 296
 297   /* Function arguments that may change */
 298
 299   const uschar *Xeptr;
 300   const uschar *Xecode;
 301   const uschar *Xmstart;
 302   int Xoffset_top;
 303   long int Xims;
 304   eptrblock *Xeptrb;
 305   int Xflags;
 306   unsigned int Xrdepth;
 307
 308   /* Function local variables */
 309
 310   const uschar *Xcallpat;
 311   const uschar *Xcharptr;
 312   const uschar *Xdata;
 313   const uschar *Xnext;
 314   const uschar *Xpp;
 315   const uschar *Xprev;
 316   const uschar *Xsaved_eptr;
 317
 318   recursion_info Xnew_recursive;
 319
 320   BOOL Xcur_is_word;
 321   BOOL Xcondition;
 322   BOOL Xprev_is_word;
 323
 324   unsigned long int Xoriginal_ims;
 325
 326 #ifdef SUPPORT_UCP
 327   int Xprop_type;
 328   int Xprop_value;
 329   int Xprop_fail_result;
 330   int Xprop_category;
 331   int Xprop_chartype;
 332   int Xprop_script;
 333   int Xoclength;
 334   uschar Xocchars[8];
 335 #endif
 336
 337   int Xctype;
 338   unsigned int Xfc;
 339   int Xfi;
 340   int Xlength;
 341   int Xmax;
 342   int Xmin;
 343   int Xnumber;
 344   int Xoffset;
 345   int Xop;
 346   int Xsave_capture_last;
 347   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 348   int Xstacksave[REC_STACK_SAVE_MAX];
 349
 350   eptrblock Xnewptrb;
 351
 352   /* Where to jump back to */
 353
 354   int Xwhere;
 355
 356 } heapframe;
 357
 358 #endif
 359
 360
 361 /***************************************************************************
 362 ***************************************************************************/
 363
 364
 365
 366 /*************************************************
 367 *         Match from current position            *
 368 *************************************************/
 369
 370 /* This function is called recursively in many circumstances. Whenever it
 371 returns a negative (error) response, the outer incarnation must also return the
 372 same response.
 373
 374 Performance note: It might be tempting to extract commonly used fields from the
 375 md structure (e.g. utf8, end_subject) into individual variables to improve
 376 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 377 made performance worse.
 378
 379 Arguments:
 380    eptr        pointer to current character in subject
 381    ecode       pointer to current position in compiled code
 382    mstart      pointer to the current match start position (can be modified
 383                  by encountering \K)
 384    offset_top  current top pointer
 385    md          pointer to "static" info for the match
 386    ims         current /i, /m, and /s options
 387    eptrb       pointer to chain of blocks containing eptr at start of
 388                  brackets - for testing for empty matches
 389    flags       can contain
 390                  match_condassert - this is an assertion condition
 391                  match_cbegroup - this is the start of an unlimited repeat
 392                    group that can match an empty string
 393    rdepth      the recursion depth
 394
 395 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 396                MATCH_NOMATCH if failed to match  )
 397                a negative PCRE_ERROR_xxx value if aborted by an error condition
 398                  (e.g. stopped by repeated call or recursion limit)
 399 */
 400
 401 static int
 402 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
 403   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
 404   int flags, unsigned int rdepth)
 405 {
 406 /* These variables do not need to be preserved over recursion in this function,
 407 so they can be ordinary variables in all cases. Mark some of them with
 408 "register" because they are used a lot in loops. */
 409
 410 register int  rrc;         /* Returns from recursive calls */
 411 register int  i;           /* Used for loops not involving calls to RMATCH() */
 412 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 413 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 414
 415 BOOL minimize, possessive; /* Quantifier options */
 416
 417 /* When recursion is not being used, all "local" variables that have to be
 418 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 419 heap storage. Set up the top-level frame here; others are obtained from the
 420 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 421
 422 #ifdef NO_RECURSE
 423 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
 424 frame->Xprevframe = NULL;            /* Marks the top level */
 425
 426 /* Copy in the original argument variables */
 427
 428 frame->Xeptr = eptr;
 429 frame->Xecode = ecode;
 430 frame->Xmstart = mstart;
 431 frame->Xoffset_top = offset_top;
 432 frame->Xims = ims;
 433 frame->Xeptrb = eptrb;
 434 frame->Xflags = flags;
 435 frame->Xrdepth = rdepth;
 436
 437 /* This is where control jumps back to to effect "recursion" */
 438
 439 HEAP_RECURSE:
 440
 441 /* Macros make the argument variables come from the current frame */
 442
 443 #define eptr               frame->Xeptr
 444 #define ecode              frame->Xecode
 445 #define mstart             frame->Xmstart
 446 #define offset_top         frame->Xoffset_top
 447 #define ims                frame->Xims
 448 #define eptrb              frame->Xeptrb
 449 #define flags              frame->Xflags
 450 #define rdepth             frame->Xrdepth
 451
 452 /* Ditto for the local variables */
 453
 454 #ifdef SUPPORT_UTF8
 455 #define charptr            frame->Xcharptr
 456 #endif
 457 #define callpat            frame->Xcallpat
 458 #define data               frame->Xdata
 459 #define next               frame->Xnext
 460 #define pp                 frame->Xpp
 461 #define prev               frame->Xprev
 462 #define saved_eptr         frame->Xsaved_eptr
 463
 464 #define new_recursive      frame->Xnew_recursive
 465
 466 #define cur_is_word        frame->Xcur_is_word
 467 #define condition          frame->Xcondition
 468 #define prev_is_word       frame->Xprev_is_word
 469
 470 #define original_ims       frame->Xoriginal_ims
 471
 472 #ifdef SUPPORT_UCP
 473 #define prop_type          frame->Xprop_type
 474 #define prop_value         frame->Xprop_value
 475 #define prop_fail_result   frame->Xprop_fail_result
 476 #define prop_category      frame->Xprop_category
 477 #define prop_chartype      frame->Xprop_chartype
 478 #define prop_script        frame->Xprop_script
 479 #define oclength           frame->Xoclength
 480 #define occhars            frame->Xocchars
 481 #endif
 482
 483 #define ctype              frame->Xctype
 484 #define fc                 frame->Xfc
 485 #define fi                 frame->Xfi
 486 #define length             frame->Xlength
 487 #define max                frame->Xmax
 488 #define min                frame->Xmin
 489 #define number             frame->Xnumber
 490 #define offset             frame->Xoffset
 491 #define op                 frame->Xop
 492 #define save_capture_last  frame->Xsave_capture_last
 493 #define save_offset1       frame->Xsave_offset1
 494 #define save_offset2       frame->Xsave_offset2
 495 #define save_offset3       frame->Xsave_offset3
 496 #define stacksave          frame->Xstacksave
 497
 498 #define newptrb            frame->Xnewptrb
 499
 500 /* When recursion is being used, local variables are allocated on the stack and
 501 get preserved during recursion in the normal way. In this environment, fi and
 502 i, and fc and c, can be the same variables. */
 503
 504 #else         /* NO_RECURSE not defined */
 505 #define fi i
 506 #define fc c
 507
 508
 509 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 510 const uschar *charptr;             /* in small blocks of the code. My normal */
 511 #endif                             /* style of coding would have declared    */
 512 const uschar *callpat;             /* them within each of those blocks.      */
 513 const uschar *data;                /* However, in order to accommodate the   */
 514 const uschar *next;                /* version of this code that uses an      */
 515 USPTR         pp;                  /* external "stack" implemented on the    */
 516 const uschar *prev;                /* heap, it is easier to declare them all */
 517 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 518                                    /* out in a block. The only declarations  */
 519 recursion_info new_recursive;      /* within blocks below are for variables  */
 520                                    /* that do not have to be preserved over  */
 521 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 522 BOOL condition;
 523 BOOL prev_is_word;
 524
 525 unsigned long int original_ims;
 526
 527 #ifdef SUPPORT_UCP
 528 int prop_type;
 529 int prop_value;
 530 int prop_fail_result;
 531 int prop_category;
 532 int prop_chartype;
 533 int prop_script;
 534 int oclength;
 535 uschar occhars[8];
 536 #endif
 537
 538 int ctype;
 539 int length;
 540 int max;
 541 int min;
 542 int number;
 543 int offset;
 544 int op;
 545 int save_capture_last;
 546 int save_offset1, save_offset2, save_offset3;
 547 int stacksave[REC_STACK_SAVE_MAX];
 548
 549 eptrblock newptrb;
 550 #endif     /* NO_RECURSE */
 551
 552 /* These statements are here to stop the compiler complaining about unitialized
 553 variables. */
 554
 555 #ifdef SUPPORT_UCP
 556 prop_value = 0;
 557 prop_fail_result = 0;
 558 #endif
 559
 560
 561 /* This label is used for tail recursion, which is used in a few cases even
 562 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 563 used. Thanks to Ian Taylor for noticing this possibility and sending the
 564 original patch. */
 565
 566 TAIL_RECURSE:
 567
 568 /* OK, now we can get on with the real code of the function. Recursive calls
 569 are specified by the macro RMATCH and RRETURN is used to return. When
 570 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 571 and a "return", respectively (possibly with some debugging if DEBUG is
 572 defined). However, RMATCH isn't like a function call because it's quite a
 573 complicated macro. It has to be used in one particular way. This shouldn't,
 574 however, impact performance when true recursion is being used. */
 575
 576 #ifdef SUPPORT_UTF8
 577 utf8 = md->utf8;       /* Local copy of the flag */
 578 #else
 579 utf8 = FALSE;
 580 #endif
 581
 582 /* First check that we haven't called match() too many times, or that we
 583 haven't exceeded the recursive call limit. */
 584
 585 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 586 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 587
 588 original_ims = ims;    /* Save for resetting on ')' */
 589
 590 /* At the start of a group with an unlimited repeat that may match an empty
 591 string, the match_cbegroup flag is set. When this is the case, add the current
 592 subject pointer to the chain of such remembered pointers, to be checked when we
 593 hit the closing ket, in order to break infinite loops that match no characters.
 594 When match() is called in other circumstances, don't add to the chain. The
 595 match_cbegroup flag must NOT be used with tail recursion, because the memory
 596 block that is used is on the stack, so a new one may be required for each
 597 match(). */
 598
 599 if ((flags & match_cbegroup) != 0)
 600   {
 601   newptrb.epb_saved_eptr = eptr;
 602   newptrb.epb_prev = eptrb;
 603   eptrb = &newptrb;
 604   }
 605
 606 /* Now start processing the opcodes. */
 607
 608 for (;;)
 609   {
 610   minimize = possessive = FALSE;
 611   op = *ecode;
 612
 613   /* For partial matching, remember if we ever hit the end of the subject after
 614   matching at least one subject character. */
 615
 616   if (md->partial &&
 617       eptr >= md->end_subject &&
 618       eptr > mstart)
 619     md->hitend = TRUE;
 620
 621   switch(op)
 622     {
 623     case OP_FAIL:
 624     RRETURN(MATCH_NOMATCH);
 625
 626     case OP_PRUNE:
 627     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 628       ims, eptrb, flags, RM51);
 629     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 630     RRETURN(MATCH_PRUNE);
 631
 632     case OP_COMMIT:
 633     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 634       ims, eptrb, flags, RM52);
 635     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 636     RRETURN(MATCH_COMMIT);
 637
 638     case OP_SKIP:
 639     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 640       ims, eptrb, flags, RM53);
 641     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 642     md->start_match_ptr = eptr;   /* Pass back current position */
 643     RRETURN(MATCH_SKIP);
 644
 645     case OP_THEN:
 646     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 647       ims, eptrb, flags, RM54);
 648     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 649     RRETURN(MATCH_THEN);
 650
 651     /* Handle a capturing bracket. If there is space in the offset vector, save
 652     the current subject position in the working slot at the top of the vector.
 653     We mustn't change the current values of the data slot, because they may be
 654     set from a previous iteration of this group, and be referred to by a
 655     reference inside the group.
 656
 657     If the bracket fails to match, we need to restore this value and also the
 658     values of the final offsets, in case they were set by a previous iteration
 659     of the same bracket.
 660
 661     If there isn't enough space in the offset vector, treat this as if it were
 662     a non-capturing bracket. Don't worry about setting the flag for the error
 663     case here; that is handled in the code for KET. */
 664
 665     case OP_CBRA:
 666     case OP_SCBRA:
 667     number = GET2(ecode, 1+LINK_SIZE);
 668     offset = number << 1;
 669
 670 #ifdef DEBUG
 671     printf("start bracket %d\n", number);
 672     printf("subject=");
 673     pchars(eptr, 16, TRUE, md);
 674     printf("\n");
 675 #endif
 676
 677     if (offset < md->offset_max)
 678       {
 679       save_offset1 = md->offset_vector[offset];
 680       save_offset2 = md->offset_vector[offset+1];
 681       save_offset3 = md->offset_vector[md->offset_end - number];
 682       save_capture_last = md->capture_last;
 683
 684       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 685       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
 686
 687       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 688       do
 689         {
 690         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 691           ims, eptrb, flags, RM1);
 692         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 693         md->capture_last = save_capture_last;
 694         ecode += GET(ecode, 1);
 695         }
 696       while (*ecode == OP_ALT);
 697
 698       DPRINTF(("bracket %d failed\n", number));
 699
 700       md->offset_vector[offset] = save_offset1;
 701       md->offset_vector[offset+1] = save_offset2;
 702       md->offset_vector[md->offset_end - number] = save_offset3;
 703
 704       RRETURN(MATCH_NOMATCH);
 705       }
 706
 707     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 708     as a non-capturing bracket. */
 709
 710     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 711     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 712
 713     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 714
 715     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 716     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 717
 718     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 719     final alternative within the brackets, we would return the result of a
 720     recursive call to match() whatever happened. We can reduce stack usage by
 721     turning this into a tail recursion, except in the case when match_cbegroup
 722     is set.*/
 723
 724     case OP_BRA:
 725     case OP_SBRA:
 726     DPRINTF(("start non-capturing bracket\n"));
 727     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 728     for (;;)
 729       {
 730       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
 731         {
 732         if (flags == 0)    /* Not a possibly empty group */
 733           {
 734           ecode += _pcre_OP_lengths[*ecode];
 735           DPRINTF(("bracket 0 tail recursion\n"));
 736           goto TAIL_RECURSE;
 737           }
 738
 739         /* Possibly empty group; can't use tail recursion. */
 740
 741         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 742           eptrb, flags, RM48);
 743         RRETURN(rrc);
 744         }
 745
 746       /* For non-final alternatives, continue the loop for a NOMATCH result;
 747       otherwise return. */
 748
 749       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 750         eptrb, flags, RM2);
 751       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 752       ecode += GET(ecode, 1);
 753       }
 754     /* Control never reaches here. */
 755
 756     /* Conditional group: compilation checked that there are no more than
 757     two branches. If the condition is false, skipping the first branch takes us
 758     past the end if there is only one branch, but that's OK because that is
 759     exactly what going to the ket would do. As there is only one branch to be
 760     obeyed, we can use tail recursion to avoid using another stack frame. */
 761
 762     case OP_COND:
 763     case OP_SCOND:
 764     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
 765       {
 766       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
 767       condition = md->recursive != NULL &&
 768         (offset == RREF_ANY || offset == md->recursive->group_num);
 769       ecode += condition? 3 : GET(ecode, 1);
 770       }
 771
 772     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
 773       {
 774       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
 775       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 776       ecode += condition? 3 : GET(ecode, 1);
 777       }
 778
 779     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
 780       {
 781       condition = FALSE;
 782       ecode += GET(ecode, 1);
 783       }
 784
 785     /* The condition is an assertion. Call match() to evaluate it - setting
 786     the final argument match_condassert causes it to stop at the end of an
 787     assertion. */
 788
 789     else
 790       {
 791       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
 792           match_condassert, RM3);
 793       if (rrc == MATCH_MATCH)
 794         {
 795         condition = TRUE;
 796         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
 797         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
 798         }
 799       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
 800         {
 801         RRETURN(rrc);         /* Need braces because of following else */
 802         }
 803       else
 804         {
 805         condition = FALSE;
 806         ecode += GET(ecode, 1);
 807         }
 808       }
 809
 810     /* We are now at the branch that is to be obeyed. As there is only one,
 811     we can use tail recursion to avoid using another stack frame, except when
 812     match_cbegroup is required for an unlimited repeat of a possibly empty
 813     group. If the second alternative doesn't exist, we can just plough on. */
 814
 815     if (condition || *ecode == OP_ALT)
 816       {
 817       ecode += 1 + LINK_SIZE;
 818       if (op == OP_SCOND)        /* Possibly empty group */
 819         {
 820         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
 821         RRETURN(rrc);
 822         }
 823       else                       /* Group must match something */
 824         {
 825         flags = 0;
 826         goto TAIL_RECURSE;
 827         }
 828       }
 829     else                         /* Condition false & no 2nd alternative */
 830       {
 831       ecode += 1 + LINK_SIZE;
 832       }
 833     break;
 834
 835
 836     /* End of the pattern, either real or forced. If we are in a top-level
 837     recursion, we should restore the offsets appropriately and continue from
 838     after the call. */
 839
 840     case OP_ACCEPT:
 841     case OP_END:
 842     if (md->recursive != NULL && md->recursive->group_num == 0)
 843       {
 844       recursion_info *rec = md->recursive;
 845       DPRINTF(("End of pattern in a (?0) recursion\n"));
 846       md->recursive = rec->prevrec;
 847       memmove(md->offset_vector, rec->offset_save,
 848         rec->saved_max * sizeof(int));
 849       mstart = rec->save_start;
 850       ims = original_ims;
 851       ecode = rec->after_call;
 852       break;
 853       }
 854
 855     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
 856     string - backtracking will then try other alternatives, if any. */
 857
 858     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
 859     md->end_match_ptr = eptr;           /* Record where we ended */
 860     md->end_offset_top = offset_top;    /* and how many extracts were taken */
 861     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
 862     RRETURN(MATCH_MATCH);
 863
 864     /* Change option settings */
 865
 866     case OP_OPT:
 867     ims = ecode[1];
 868     ecode += 2;
 869     DPRINTF(("ims set to %02lx\n", ims));
 870     break;
 871
 872     /* Assertion brackets. Check the alternative branches in turn - the
 873     matching won't pass the KET for an assertion. If any one branch matches,
 874     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 875     start of each branch to move the current point backwards, so the code at
 876     this level is identical to the lookahead case. */
 877
 878     case OP_ASSERT:
 879     case OP_ASSERTBACK:
 880     do
 881       {
 882       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 883         RM4);
 884       if (rrc == MATCH_MATCH) break;
 885       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 886       ecode += GET(ecode, 1);
 887       }
 888     while (*ecode == OP_ALT);
 889     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 890
 891     /* If checking an assertion for a condition, return MATCH_MATCH. */
 892
 893     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 894
 895     /* Continue from after the assertion, updating the offsets high water
 896     mark, since extracts may have been taken during the assertion. */
 897
 898     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 899     ecode += 1 + LINK_SIZE;
 900     offset_top = md->end_offset_top;
 901     continue;
 902
 903     /* Negative assertion: all branches must fail to match */
 904
 905     case OP_ASSERT_NOT:
 906     case OP_ASSERTBACK_NOT:
 907     do
 908       {
 909       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 910         RM5);
 911       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
 912       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 913       ecode += GET(ecode,1);
 914       }
 915     while (*ecode == OP_ALT);
 916
 917     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 918
 919     ecode += 1 + LINK_SIZE;
 920     continue;
 921
 922     /* Move the subject pointer back. This occurs only at the start of
 923     each branch of a lookbehind assertion. If we are too close to the start to
 924     move back, this match function fails. When working with UTF-8 we move
 925     back a number of characters, not bytes. */
 926
 927     case OP_REVERSE:
 928 #ifdef SUPPORT_UTF8
 929     if (utf8)
 930       {
 931       i = GET(ecode, 1);
 932       while (i-- > 0)
 933         {
 934         eptr--;
 935         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 936         BACKCHAR(eptr);
 937         }
 938       }
 939     else
 940 #endif
 941
 942     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
 943
 944       {
 945       eptr -= GET(ecode, 1);
 946       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 947       }
 948
 949     /* Skip to next op code */
 950
 951     ecode += 1 + LINK_SIZE;
 952     break;
 953
 954     /* The callout item calls an external function, if one is provided, passing
 955     details of the match so far. This is mainly for debugging, though the
 956     function is able to force a failure. */
 957
 958     case OP_CALLOUT:
 959     if (pcre_callout != NULL)
 960       {
 961       pcre_callout_block cb;
 962       cb.version          = 1;   /* Version 1 of the callout block */
 963       cb.callout_number   = ecode[1];
 964       cb.offset_vector    = md->offset_vector;
 965       cb.subject          = (PCRE_SPTR)md->start_subject;
 966       cb.subject_length   = md->end_subject - md->start_subject;
 967       cb.start_match      = mstart - md->start_subject;
 968       cb.current_position = eptr - md->start_subject;
 969       cb.pattern_position = GET(ecode, 2);
 970       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 971       cb.capture_top      = offset_top/2;
 972       cb.capture_last     = md->capture_last;
 973       cb.callout_data     = md->callout_data;
 974       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 975       if (rrc < 0) RRETURN(rrc);
 976       }
 977     ecode += 2 + 2*LINK_SIZE;
 978     break;
 979
 980     /* Recursion either matches the current regex, or some subexpression. The
 981     offset data is the offset to the starting bracket from the start of the
 982     whole pattern. (This is so that it works from duplicated subpatterns.)
 983
 984     If there are any capturing brackets started but not finished, we have to
 985     save their starting points and reinstate them after the recursion. However,
 986     we don't know how many such there are (offset_top records the completed
 987     total) so we just have to save all the potential data. There may be up to
 988     65535 such values, which is too large to put on the stack, but using malloc
 989     for small numbers seems expensive. As a compromise, the stack is used when
 990     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
 991     is used. A problem is what to do if the malloc fails ... there is no way of
 992     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
 993     values on the stack, and accept that the rest may be wrong.
 994
 995     There are also other values that have to be saved. We use a chained
 996     sequence of blocks that actually live on the stack. Thanks to Robin Houston
 997     for the original version of this logic. */
 998
 999     case OP_RECURSE:
1000       {
1001       callpat = md->start_code + GET(ecode, 1);
1002       new_recursive.group_num = (callpat == md->start_code)? 0 :
1003         GET2(callpat, 1 + LINK_SIZE);
1004
1005       /* Add to "recursing stack" */
1006
1007       new_recursive.prevrec = md->recursive;
1008       md->recursive = &new_recursive;
1009
1010       /* Find where to continue from afterwards */
1011
1012       ecode += 1 + LINK_SIZE;
1013       new_recursive.after_call = ecode;
1014
1015       /* Now save the offset data. */
1016
1017       new_recursive.saved_max = md->offset_end;
1018       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1019         new_recursive.offset_save = stacksave;
1020       else
1021         {
1022         new_recursive.offset_save =
1023           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1024         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1025         }
1026
1027       memcpy(new_recursive.offset_save, md->offset_vector,
1028             new_recursive.saved_max * sizeof(int));
1029       new_recursive.save_start = mstart;
1030       mstart = eptr;
1031
1032       /* OK, now we can do the recursion. For each top-level alternative we
1033       restore the offset and recursion data. */
1034
1035       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1036       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1037       do
1038         {
1039         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1040           md, ims, eptrb, flags, RM6);
1041         if (rrc == MATCH_MATCH)
1042           {
1043           DPRINTF(("Recursion matched\n"));
1044           md->recursive = new_recursive.prevrec;
1045           if (new_recursive.offset_save != stacksave)
1046             (pcre_free)(new_recursive.offset_save);
1047           RRETURN(MATCH_MATCH);
1048           }
1049         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1050           {
1051           DPRINTF(("Recursion gave error %d\n", rrc));
1052           RRETURN(rrc);
1053           }
1054
1055         md->recursive = &new_recursive;
1056         memcpy(md->offset_vector, new_recursive.offset_save,
1057             new_recursive.saved_max * sizeof(int));
1058         callpat += GET(callpat, 1);
1059         }
1060       while (*callpat == OP_ALT);
1061
1062       DPRINTF(("Recursion didn't match\n"));
1063       md->recursive = new_recursive.prevrec;
1064       if (new_recursive.offset_save != stacksave)
1065         (pcre_free)(new_recursive.offset_save);
1066       RRETURN(MATCH_NOMATCH);
1067       }
1068     /* Control never reaches here */
1069
1070     /* "Once" brackets are like assertion brackets except that after a match,
1071     the point in the subject string is not moved back. Thus there can never be
1072     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1073     Check the alternative branches in turn - the matching won't pass the KET
1074     for this kind of subpattern. If any one branch matches, we carry on as at
1075     the end of a normal bracket, leaving the subject pointer. */
1076
1077     case OP_ONCE:
1078     prev = ecode;
1079     saved_eptr = eptr;
1080
1081     do
1082       {
1083       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1084       if (rrc == MATCH_MATCH) break;
1085       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1086       ecode += GET(ecode,1);
1087       }
1088     while (*ecode == OP_ALT);
1089
1090     /* If hit the end of the group (which could be repeated), fail */
1091
1092     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1093
1094     /* Continue as from after the assertion, updating the offsets high water
1095     mark, since extracts may have been taken. */
1096
1097     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1098
1099     offset_top = md->end_offset_top;
1100     eptr = md->end_match_ptr;
1101
1102     /* For a non-repeating ket, just continue at this level. This also
1103     happens for a repeating ket if no characters were matched in the group.
1104     This is the forcible breaking of infinite loops as implemented in Perl
1105     5.005. If there is an options reset, it will get obeyed in the normal
1106     course of events. */
1107
1108     if (*ecode == OP_KET || eptr == saved_eptr)
1109       {
1110       ecode += 1+LINK_SIZE;
1111       break;
1112       }
1113
1114     /* The repeating kets try the rest of the pattern or restart from the
1115     preceding bracket, in the appropriate order. The second "call" of match()
1116     uses tail recursion, to avoid using another stack frame. We need to reset
1117     any options that changed within the bracket before re-running it, so
1118     check the next opcode. */
1119
1120     if (ecode[1+LINK_SIZE] == OP_OPT)
1121       {
1122       ims = (ims & ~PCRE_IMS) | ecode[4];
1123       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1124       }
1125
1126     if (*ecode == OP_KETRMIN)
1127       {
1128       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1129       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1130       ecode = prev;
1131       flags = 0;
1132       goto TAIL_RECURSE;
1133       }
1134     else  /* OP_KETRMAX */
1135       {
1136       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1137       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1138       ecode += 1 + LINK_SIZE;
1139       flags = 0;
1140       goto TAIL_RECURSE;
1141       }
1142     /* Control never gets here */
1143
1144     /* An alternation is the end of a branch; scan along to find the end of the
1145     bracketed group and go to there. */
1146
1147     case OP_ALT:
1148     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149     break;
1150
1151     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152     indicating that it may occur zero times. It may repeat infinitely, or not
1153     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154     with fixed upper repeat limits are compiled as a number of copies, with the
1155     optional ones preceded by BRAZERO or BRAMINZERO. */
1156
1157     case OP_BRAZERO:
1158       {
1159       next = ecode+1;
1160       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1161       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1162       do next += GET(next,1); while (*next == OP_ALT);
1163       ecode = next + 1 + LINK_SIZE;
1164       }
1165     break;
1166
1167     case OP_BRAMINZERO:
1168       {
1169       next = ecode+1;
1170       do next += GET(next, 1); while (*next == OP_ALT);
1171       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1172       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1173       ecode++;
1174       }
1175     break;
1176
1177     case OP_SKIPZERO:
1178       {
1179       next = ecode+1;
1180       do next += GET(next,1); while (*next == OP_ALT);
1181       ecode = next + 1 + LINK_SIZE;
1182       }
1183     break;
1184
1185     /* End of a group, repeated or non-repeating. */
1186
1187     case OP_KET:
1188     case OP_KETRMIN:
1189     case OP_KETRMAX:
1190     prev = ecode - GET(ecode, 1);
1191
1192     /* If this was a group that remembered the subject start, in order to break
1193     infinite repeats of empty string matches, retrieve the subject start from
1194     the chain. Otherwise, set it NULL. */
1195
1196     if (*prev >= OP_SBRA)
1197       {
1198       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1199       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1200       }
1201     else saved_eptr = NULL;
1202
1203     /* If we are at the end of an assertion group, stop matching and return
1204     MATCH_MATCH, but record the current high water mark for use by positive
1205     assertions. Do this also for the "once" (atomic) groups. */
1206
1207     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1208         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1209         *prev == OP_ONCE)
1210       {
1211       md->end_match_ptr = eptr;      /* For ONCE */
1212       md->end_offset_top = offset_top;
1213       RRETURN(MATCH_MATCH);
1214       }
1215
1216     /* For capturing groups we have to check the group number back at the start
1217     and if necessary complete handling an extraction by setting the offsets and
1218     bumping the high water mark. Note that whole-pattern recursion is coded as
1219     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1220     when the OP_END is reached. Other recursion is handled here. */
1221
1222     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1223       {
1224       number = GET2(prev, 1+LINK_SIZE);
1225       offset = number << 1;
1226
1227 #ifdef DEBUG
1228       printf("end bracket %d", number);
1229       printf("\n");
1230 #endif
1231
1232       md->capture_last = number;
1233       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1234         {
1235         md->offset_vector[offset] =
1236           md->offset_vector[md->offset_end - number];
1237         md->offset_vector[offset+1] = eptr - md->start_subject;
1238         if (offset_top <= offset) offset_top = offset + 2;
1239         }
1240
1241       /* Handle a recursively called group. Restore the offsets
1242       appropriately and continue from after the call. */
1243
1244       if (md->recursive != NULL && md->recursive->group_num == number)
1245         {
1246         recursion_info *rec = md->recursive;
1247         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1248         md->recursive = rec->prevrec;
1249         mstart = rec->save_start;
1250         memcpy(md->offset_vector, rec->offset_save,
1251           rec->saved_max * sizeof(int));
1252         ecode = rec->after_call;
1253         ims = original_ims;
1254         break;
1255         }
1256       }
1257
1258     /* For both capturing and non-capturing groups, reset the value of the ims
1259     flags, in case they got changed during the group. */
1260
1261     ims = original_ims;
1262     DPRINTF(("ims reset to %02lx\n", ims));
1263
1264     /* For a non-repeating ket, just continue at this level. This also
1265     happens for a repeating ket if no characters were matched in the group.
1266     This is the forcible breaking of infinite loops as implemented in Perl
1267     5.005. If there is an options reset, it will get obeyed in the normal
1268     course of events. */
1269
1270     if (*ecode == OP_KET || eptr == saved_eptr)
1271       {
1272       ecode += 1 + LINK_SIZE;
1273       break;
1274       }
1275
1276     /* The repeating kets try the rest of the pattern or restart from the
1277     preceding bracket, in the appropriate order. In the second case, we can use
1278     tail recursion to avoid using another stack frame, unless we have an
1279     unlimited repeat of a group that can match an empty string. */
1280
1281     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1282
1283     if (*ecode == OP_KETRMIN)
1284       {
1285       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1286       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1287       if (flags != 0)    /* Could match an empty string */
1288         {
1289         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1290         RRETURN(rrc);
1291         }
1292       ecode = prev;
1293       goto TAIL_RECURSE;
1294       }
1295     else  /* OP_KETRMAX */
1296       {
1297       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1298       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1299       ecode += 1 + LINK_SIZE;
1300       flags = 0;
1301       goto TAIL_RECURSE;
1302       }
1303     /* Control never gets here */
1304
1305     /* Start of subject unless notbol, or after internal newline if multiline */
1306
1307     case OP_CIRC:
1308     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1309     if ((ims & PCRE_MULTILINE) != 0)
1310       {
1311       if (eptr != md->start_subject &&
1312           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1313         RRETURN(MATCH_NOMATCH);
1314       ecode++;
1315       break;
1316       }
1317     /* ... else fall through */
1318
1319     /* Start of subject assertion */
1320
1321     case OP_SOD:
1322     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1323     ecode++;
1324     break;
1325
1326     /* Start of match assertion */
1327
1328     case OP_SOM:
1329     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1330     ecode++;
1331     break;
1332
1333     /* Reset the start of match point */
1334
1335     case OP_SET_SOM:
1336     mstart = eptr;
1337     ecode++;
1338     break;
1339
1340     /* Assert before internal newline if multiline, or before a terminating
1341     newline unless endonly is set, else end of subject unless noteol is set. */
1342
1343     case OP_DOLL:
1344     if ((ims & PCRE_MULTILINE) != 0)
1345       {
1346       if (eptr < md->end_subject)
1347         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1348       else
1349         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1350       ecode++;
1351       break;
1352       }
1353     else
1354       {
1355       if (md->noteol) RRETURN(MATCH_NOMATCH);
1356       if (!md->endonly)
1357         {
1358         if (eptr != md->end_subject &&
1359             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1360           RRETURN(MATCH_NOMATCH);
1361         ecode++;
1362         break;
1363         }
1364       }
1365     /* ... else fall through for endonly */
1366
1367     /* End of subject assertion (\z) */
1368
1369     case OP_EOD:
1370     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1371     ecode++;
1372     break;
1373
1374     /* End of subject or ending \n assertion (\Z) */
1375
1376     case OP_EODN:
1377     if (eptr != md->end_subject &&
1378         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1379       RRETURN(MATCH_NOMATCH);
1380     ecode++;
1381     break;
1382
1383     /* Word boundary assertions */
1384
1385     case OP_NOT_WORD_BOUNDARY:
1386     case OP_WORD_BOUNDARY:
1387       {
1388
1389       /* Find out if the previous and current characters are "word" characters.
1390       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1391       be "non-word" characters. */
1392
1393 #ifdef SUPPORT_UTF8
1394       if (utf8)
1395         {
1396         if (eptr == md->start_subject) prev_is_word = FALSE; else
1397           {
1398           const uschar *lastptr = eptr - 1;
1399           while((*lastptr & 0xc0) == 0x80) lastptr--;
1400           GETCHAR(c, lastptr);
1401           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1402           }
1403         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1404           {
1405           GETCHAR(c, eptr);
1406           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1407           }
1408         }
1409       else
1410 #endif
1411
1412       /* More streamlined when not in UTF-8 mode */
1413
1414         {
1415         prev_is_word = (eptr != md->start_subject) &&
1416           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1417         cur_is_word = (eptr < md->end_subject) &&
1418           ((md->ctypes[*eptr] & ctype_word) != 0);
1419         }
1420
1421       /* Now see if the situation is what we want */
1422
1423       if ((*ecode++ == OP_WORD_BOUNDARY)?
1424            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1425         RRETURN(MATCH_NOMATCH);
1426       }
1427     break;
1428
1429     /* Match a single character type; inline for speed */
1430
1431     case OP_ANY:
1432     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1433     /* Fall through */
1434
1435     case OP_ALLANY:
1436     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1437     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1438     ecode++;
1439     break;
1440
1441     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1442     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1443
1444     case OP_ANYBYTE:
1445     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446     ecode++;
1447     break;
1448
1449     case OP_NOT_DIGIT:
1450     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1451     GETCHARINCTEST(c, eptr);
1452     if (
1453 #ifdef SUPPORT_UTF8
1454        c < 256 &&
1455 #endif
1456        (md->ctypes[c] & ctype_digit) != 0
1457        )
1458       RRETURN(MATCH_NOMATCH);
1459     ecode++;
1460     break;
1461
1462     case OP_DIGIT:
1463     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1464     GETCHARINCTEST(c, eptr);
1465     if (
1466 #ifdef SUPPORT_UTF8
1467        c >= 256 ||
1468 #endif
1469        (md->ctypes[c] & ctype_digit) == 0
1470        )
1471       RRETURN(MATCH_NOMATCH);
1472     ecode++;
1473     break;
1474
1475     case OP_NOT_WHITESPACE:
1476     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1477     GETCHARINCTEST(c, eptr);
1478     if (
1479 #ifdef SUPPORT_UTF8
1480        c < 256 &&
1481 #endif
1482        (md->ctypes[c] & ctype_space) != 0
1483        )
1484       RRETURN(MATCH_NOMATCH);
1485     ecode++;
1486     break;
1487
1488     case OP_WHITESPACE:
1489     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1490     GETCHARINCTEST(c, eptr);
1491     if (
1492 #ifdef SUPPORT_UTF8
1493        c >= 256 ||
1494 #endif
1495        (md->ctypes[c] & ctype_space) == 0
1496        )
1497       RRETURN(MATCH_NOMATCH);
1498     ecode++;
1499     break;
1500
1501     case OP_NOT_WORDCHAR:
1502     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1503     GETCHARINCTEST(c, eptr);
1504     if (
1505 #ifdef SUPPORT_UTF8
1506        c < 256 &&
1507 #endif
1508        (md->ctypes[c] & ctype_word) != 0
1509        )
1510       RRETURN(MATCH_NOMATCH);
1511     ecode++;
1512     break;
1513
1514     case OP_WORDCHAR:
1515     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1516     GETCHARINCTEST(c, eptr);
1517     if (
1518 #ifdef SUPPORT_UTF8
1519        c >= 256 ||
1520 #endif
1521        (md->ctypes[c] & ctype_word) == 0
1522        )
1523       RRETURN(MATCH_NOMATCH);
1524     ecode++;
1525     break;
1526
1527     case OP_ANYNL:
1528     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1529     GETCHARINCTEST(c, eptr);
1530     switch(c)
1531       {
1532       default: RRETURN(MATCH_NOMATCH);
1533       case 0x000d:
1534       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1535       break;
1536
1537       case 0x000a:
1538       break;
1539
1540       case 0x000b:
1541       case 0x000c:
1542       case 0x0085:
1543       case 0x2028:
1544       case 0x2029:
1545       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1546       break;
1547       }
1548     ecode++;
1549     break;
1550
1551     case OP_NOT_HSPACE:
1552     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1553     GETCHARINCTEST(c, eptr);
1554     switch(c)
1555       {
1556       default: break;
1557       case 0x09:      /* HT */
1558       case 0x20:      /* SPACE */
1559       case 0xa0:      /* NBSP */
1560       case 0x1680:    /* OGHAM SPACE MARK */
1561       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1562       case 0x2000:    /* EN QUAD */
1563       case 0x2001:    /* EM QUAD */
1564       case 0x2002:    /* EN SPACE */
1565       case 0x2003:    /* EM SPACE */
1566       case 0x2004:    /* THREE-PER-EM SPACE */
1567       case 0x2005:    /* FOUR-PER-EM SPACE */
1568       case 0x2006:    /* SIX-PER-EM SPACE */
1569       case 0x2007:    /* FIGURE SPACE */
1570       case 0x2008:    /* PUNCTUATION SPACE */
1571       case 0x2009:    /* THIN SPACE */
1572       case 0x200A:    /* HAIR SPACE */
1573       case 0x202f:    /* NARROW NO-BREAK SPACE */
1574       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1575       case 0x3000:    /* IDEOGRAPHIC SPACE */
1576       RRETURN(MATCH_NOMATCH);
1577       }
1578     ecode++;
1579     break;
1580
1581     case OP_HSPACE:
1582     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1583     GETCHARINCTEST(c, eptr);
1584     switch(c)
1585       {
1586       default: RRETURN(MATCH_NOMATCH);
1587       case 0x09:      /* HT */
1588       case 0x20:      /* SPACE */
1589       case 0xa0:      /* NBSP */
1590       case 0x1680:    /* OGHAM SPACE MARK */
1591       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1592       case 0x2000:    /* EN QUAD */
1593       case 0x2001:    /* EM QUAD */
1594       case 0x2002:    /* EN SPACE */
1595       case 0x2003:    /* EM SPACE */
1596       case 0x2004:    /* THREE-PER-EM SPACE */
1597       case 0x2005:    /* FOUR-PER-EM SPACE */
1598       case 0x2006:    /* SIX-PER-EM SPACE */
1599       case 0x2007:    /* FIGURE SPACE */
1600       case 0x2008:    /* PUNCTUATION SPACE */
1601       case 0x2009:    /* THIN SPACE */
1602       case 0x200A:    /* HAIR SPACE */
1603       case 0x202f:    /* NARROW NO-BREAK SPACE */
1604       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1605       case 0x3000:    /* IDEOGRAPHIC SPACE */
1606       break;
1607       }
1608     ecode++;
1609     break;
1610
1611     case OP_NOT_VSPACE:
1612     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1613     GETCHARINCTEST(c, eptr);
1614     switch(c)
1615       {
1616       default: break;
1617       case 0x0a:      /* LF */
1618       case 0x0b:      /* VT */
1619       case 0x0c:      /* FF */
1620       case 0x0d:      /* CR */
1621       case 0x85:      /* NEL */
1622       case 0x2028:    /* LINE SEPARATOR */
1623       case 0x2029:    /* PARAGRAPH SEPARATOR */
1624       RRETURN(MATCH_NOMATCH);
1625       }
1626     ecode++;
1627     break;
1628
1629     case OP_VSPACE:
1630     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1631     GETCHARINCTEST(c, eptr);
1632     switch(c)
1633       {
1634       default: RRETURN(MATCH_NOMATCH);
1635       case 0x0a:      /* LF */
1636       case 0x0b:      /* VT */
1637       case 0x0c:      /* FF */
1638       case 0x0d:      /* CR */
1639       case 0x85:      /* NEL */
1640       case 0x2028:    /* LINE SEPARATOR */
1641       case 0x2029:    /* PARAGRAPH SEPARATOR */
1642       break;
1643       }
1644     ecode++;
1645     break;
1646
1647 #ifdef SUPPORT_UCP
1648     /* Check the next character by Unicode property. We will get here only
1649     if the support is in the binary; otherwise a compile-time error occurs. */
1650
1651     case OP_PROP:
1652     case OP_NOTPROP:
1653     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1654     GETCHARINCTEST(c, eptr);
1655       {
1656       int chartype, script;
1657       int category = _pcre_ucp_findprop(c, &chartype, &script);
1658
1659       switch(ecode[1])
1660         {
1661         case PT_ANY:
1662         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1663         break;
1664
1665         case PT_LAMP:
1666         if ((chartype == ucp_Lu ||
1667              chartype == ucp_Ll ||
1668              chartype == ucp_Lt) == (op == OP_NOTPROP))
1669           RRETURN(MATCH_NOMATCH);
1670          break;
1671
1672         case PT_GC:
1673         if ((ecode[2] != category) == (op == OP_PROP))
1674           RRETURN(MATCH_NOMATCH);
1675         break;
1676
1677         case PT_PC:
1678         if ((ecode[2] != chartype) == (op == OP_PROP))
1679           RRETURN(MATCH_NOMATCH);
1680         break;
1681
1682         case PT_SC:
1683         if ((ecode[2] != script) == (op == OP_PROP))
1684           RRETURN(MATCH_NOMATCH);
1685         break;
1686
1687         default:
1688         RRETURN(PCRE_ERROR_INTERNAL);
1689         }
1690
1691       ecode += 3;
1692       }
1693     break;
1694
1695     /* Match an extended Unicode sequence. We will get here only if the support
1696     is in the binary; otherwise a compile-time error occurs. */
1697
1698     case OP_EXTUNI:
1699     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1700     GETCHARINCTEST(c, eptr);
1701       {
1702       int chartype, script;
1703       int category = _pcre_ucp_findprop(c, &chartype, &script);
1704       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1705       while (eptr < md->end_subject)
1706         {
1707         int len = 1;
1708         if (!utf8) c = *eptr; else
1709           {
1710           GETCHARLEN(c, eptr, len);
1711           }
1712         category = _pcre_ucp_findprop(c, &chartype, &script);
1713         if (category != ucp_M) break;
1714         eptr += len;
1715         }
1716       }
1717     ecode++;
1718     break;
1719 #endif
1720
1721
1722     /* Match a back reference, possibly repeatedly. Look past the end of the
1723     item to see if there is repeat information following. The code is similar
1724     to that for character classes, but repeated for efficiency. Then obey
1725     similar code to character type repeats - written out again for speed.
1726     However, if the referenced string is the empty string, always treat
1727     it as matched, any number of times (otherwise there could be infinite
1728     loops). */
1729
1730     case OP_REF:
1731       {
1732       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1733       ecode += 3;
1734
1735       /* If the reference is unset, there are two possibilities:
1736
1737       (a) In the default, Perl-compatible state, set the length to be longer
1738       than the amount of subject left; this ensures that every attempt at a
1739       match fails. We can't just fail here, because of the possibility of
1740       quantifiers with zero minima.
1741
1742       (b) If the JavaScript compatibility flag is set, set the length to zero
1743       so that the back reference matches an empty string.
1744
1745       Otherwise, set the length to the length of what was matched by the
1746       referenced subpattern. */
1747
1748       if (offset >= offset_top || md->offset_vector[offset] < 0)
1749         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1750       else
1751         length = md->offset_vector[offset+1] - md->offset_vector[offset];
1752
1753       /* Set up for repetition, or handle the non-repeated case */
1754
1755       switch (*ecode)
1756         {
1757         case OP_CRSTAR:
1758         case OP_CRMINSTAR:
1759         case OP_CRPLUS:
1760         case OP_CRMINPLUS:
1761         case OP_CRQUERY:
1762         case OP_CRMINQUERY:
1763         c = *ecode++ - OP_CRSTAR;
1764         minimize = (c & 1) != 0;
1765         min = rep_min[c];                 /* Pick up values from tables; */
1766         max = rep_max[c];                 /* zero for max => infinity */
1767         if (max == 0) max = INT_MAX;
1768         break;
1769
1770         case OP_CRRANGE:
1771         case OP_CRMINRANGE:
1772         minimize = (*ecode == OP_CRMINRANGE);
1773         min = GET2(ecode, 1);
1774         max = GET2(ecode, 3);
1775         if (max == 0) max = INT_MAX;
1776         ecode += 5;
1777         break;
1778
1779         default:               /* No repeat follows */
1780         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1781         eptr += length;
1782         continue;              /* With the main loop */
1783         }
1784
1785       /* If the length of the reference is zero, just continue with the
1786       main loop. */
1787
1788       if (length == 0) continue;
1789
1790       /* First, ensure the minimum number of matches are present. We get back
1791       the length of the reference string explicitly rather than passing the
1792       address of eptr, so that eptr can be a register variable. */
1793
1794       for (i = 1; i <= min; i++)
1795         {
1796         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1797         eptr += length;
1798         }
1799
1800       /* If min = max, continue at the same level without recursion.
1801       They are not both allowed to be zero. */
1802
1803       if (min == max) continue;
1804
1805       /* If minimizing, keep trying and advancing the pointer */
1806
1807       if (minimize)
1808         {
1809         for (fi = min;; fi++)
1810           {
1811           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1812           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1813           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1814             RRETURN(MATCH_NOMATCH);
1815           eptr += length;
1816           }
1817         /* Control never gets here */
1818         }
1819
1820       /* If maximizing, find the longest string and work backwards */
1821
1822       else
1823         {
1824         pp = eptr;
1825         for (i = min; i < max; i++)
1826           {
1827           if (!match_ref(offset, eptr, length, md, ims)) break;
1828           eptr += length;
1829           }
1830         while (eptr >= pp)
1831           {
1832           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1833           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1834           eptr -= length;
1835           }
1836         RRETURN(MATCH_NOMATCH);
1837         }
1838       }
1839     /* Control never gets here */
1840
1841
1842
1843     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1844     used when all the characters in the class have values in the range 0-255,
1845     and either the matching is caseful, or the characters are in the range
1846     0-127 when UTF-8 processing is enabled. The only difference between
1847     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1848     encountered.
1849
1850     First, look past the end of the item to see if there is repeat information
1851     following. Then obey similar code to character type repeats - written out
1852     again for speed. */
1853
1854     case OP_NCLASS:
1855     case OP_CLASS:
1856       {
1857       data = ecode + 1;                /* Save for matching */
1858       ecode += 33;                     /* Advance past the item */
1859
1860       switch (*ecode)
1861         {
1862         case OP_CRSTAR:
1863         case OP_CRMINSTAR:
1864         case OP_CRPLUS:
1865         case OP_CRMINPLUS:
1866         case OP_CRQUERY:
1867         case OP_CRMINQUERY:
1868         c = *ecode++ - OP_CRSTAR;
1869         minimize = (c & 1) != 0;
1870         min = rep_min[c];                 /* Pick up values from tables; */
1871         max = rep_max[c];                 /* zero for max => infinity */
1872         if (max == 0) max = INT_MAX;
1873         break;
1874
1875         case OP_CRRANGE:
1876         case OP_CRMINRANGE:
1877         minimize = (*ecode == OP_CRMINRANGE);
1878         min = GET2(ecode, 1);
1879         max = GET2(ecode, 3);
1880         if (max == 0) max = INT_MAX;
1881         ecode += 5;
1882         break;
1883
1884         default:               /* No repeat follows */
1885         min = max = 1;
1886         break;
1887         }
1888
1889       /* First, ensure the minimum number of matches are present. */
1890
1891 #ifdef SUPPORT_UTF8
1892       /* UTF-8 mode */
1893       if (utf8)
1894         {
1895         for (i = 1; i <= min; i++)
1896           {
1897           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1898           GETCHARINC(c, eptr);
1899           if (c > 255)
1900             {
1901             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1902             }
1903           else
1904             {
1905             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1906             }
1907           }
1908         }
1909       else
1910 #endif
1911       /* Not UTF-8 mode */
1912         {
1913         for (i = 1; i <= min; i++)
1914           {
1915           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1916           c = *eptr++;
1917           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1918           }
1919         }
1920
1921       /* If max == min we can continue with the main loop without the
1922       need to recurse. */
1923
1924       if (min == max) continue;
1925
1926       /* If minimizing, keep testing the rest of the expression and advancing
1927       the pointer while it matches the class. */
1928
1929       if (minimize)
1930         {
1931 #ifdef SUPPORT_UTF8
1932         /* UTF-8 mode */
1933         if (utf8)
1934           {
1935           for (fi = min;; fi++)
1936             {
1937             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1938             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1939             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1940             GETCHARINC(c, eptr);
1941             if (c > 255)
1942               {
1943               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1944               }
1945             else
1946               {
1947               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1948               }
1949             }
1950           }
1951         else
1952 #endif
1953         /* Not UTF-8 mode */
1954           {
1955           for (fi = min;; fi++)
1956             {
1957             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1958             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1959             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1960             c = *eptr++;
1961             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1962             }
1963           }
1964         /* Control never gets here */
1965         }
1966
1967       /* If maximizing, find the longest possible run, then work backwards. */
1968
1969       else
1970         {
1971         pp = eptr;
1972
1973 #ifdef SUPPORT_UTF8
1974         /* UTF-8 mode */
1975         if (utf8)
1976           {
1977           for (i = min; i < max; i++)
1978             {
1979             int len = 1;
1980             if (eptr >= md->end_subject) break;
1981             GETCHARLEN(c, eptr, len);
1982             if (c > 255)
1983               {
1984               if (op == OP_CLASS) break;
1985               }
1986             else
1987               {
1988               if ((data[c/8] & (1 << (c&7))) == 0) break;
1989               }
1990             eptr += len;
1991             }
1992           for (;;)
1993             {
1994             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1995             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1996             if (eptr-- == pp) break;        /* Stop if tried at original pos */
1997             BACKCHAR(eptr);
1998             }
1999           }
2000         else
2001 #endif
2002           /* Not UTF-8 mode */
2003           {
2004           for (i = min; i < max; i++)
2005             {
2006             if (eptr >= md->end_subject) break;
2007             c = *eptr;
2008             if ((data[c/8] & (1 << (c&7))) == 0) break;
2009             eptr++;
2010             }
2011           while (eptr >= pp)
2012             {
2013             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2014             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2015             eptr--;
2016             }
2017           }
2018
2019         RRETURN(MATCH_NOMATCH);
2020         }
2021       }
2022     /* Control never gets here */
2023
2024
2025     /* Match an extended character class. This opcode is encountered only
2026     in UTF-8 mode, because that's the only time it is compiled. */
2027
2028 #ifdef SUPPORT_UTF8
2029     case OP_XCLASS:
2030       {
2031       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
2032       ecode += GET(ecode, 1);                      /* Advance past the item */
2033
2034       switch (*ecode)
2035         {
2036         case OP_CRSTAR:
2037         case OP_CRMINSTAR:
2038         case OP_CRPLUS:
2039         case OP_CRMINPLUS:
2040         case OP_CRQUERY:
2041         case OP_CRMINQUERY:
2042         c = *ecode++ - OP_CRSTAR;
2043         minimize = (c & 1) != 0;
2044         min = rep_min[c];                 /* Pick up values from tables; */
2045         max = rep_max[c];                 /* zero for max => infinity */
2046         if (max == 0) max = INT_MAX;
2047         break;
2048
2049         case OP_CRRANGE:
2050         case OP_CRMINRANGE:
2051         minimize = (*ecode == OP_CRMINRANGE);
2052         min = GET2(ecode, 1);
2053         max = GET2(ecode, 3);
2054         if (max == 0) max = INT_MAX;
2055         ecode += 5;
2056         break;
2057
2058         default:               /* No repeat follows */
2059         min = max = 1;
2060         break;
2061         }
2062
2063       /* First, ensure the minimum number of matches are present. */
2064
2065       for (i = 1; i <= min; i++)
2066         {
2067         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2068         GETCHARINC(c, eptr);
2069         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2070         }
2071
2072       /* If max == min we can continue with the main loop without the
2073       need to recurse. */
2074
2075       if (min == max) continue;
2076
2077       /* If minimizing, keep testing the rest of the expression and advancing
2078       the pointer while it matches the class. */
2079
2080       if (minimize)
2081         {
2082         for (fi = min;; fi++)
2083           {
2084           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2085           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2086           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2087           GETCHARINC(c, eptr);
2088           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2089           }
2090         /* Control never gets here */
2091         }
2092
2093       /* If maximizing, find the longest possible run, then work backwards. */
2094
2095       else
2096         {
2097         pp = eptr;
2098         for (i = min; i < max; i++)
2099           {
2100           int len = 1;
2101           if (eptr >= md->end_subject) break;
2102           GETCHARLEN(c, eptr, len);
2103           if (!_pcre_xclass(c, data)) break;
2104           eptr += len;
2105           }
2106         for(;;)
2107           {
2108           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2109           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2110           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2111           if (utf8) BACKCHAR(eptr);
2112           }
2113         RRETURN(MATCH_NOMATCH);
2114         }
2115
2116       /* Control never gets here */
2117       }
2118 #endif    /* End of XCLASS */
2119
2120     /* Match a single character, casefully */
2121
2122     case OP_CHAR:
2123 #ifdef SUPPORT_UTF8
2124     if (utf8)
2125       {
2126       length = 1;
2127       ecode++;
2128       GETCHARLEN(fc, ecode, length);
2129       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2130       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2131       }
2132     else
2133 #endif
2134
2135     /* Non-UTF-8 mode */
2136       {
2137       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2138       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2139       ecode += 2;
2140       }
2141     break;
2142
2143     /* Match a single character, caselessly */
2144
2145     case OP_CHARNC:
2146 #ifdef SUPPORT_UTF8
2147     if (utf8)
2148       {
2149       length = 1;
2150       ecode++;
2151       GETCHARLEN(fc, ecode, length);
2152
2153       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2154
2155       /* If the pattern character's value is < 128, we have only one byte, and
2156       can use the fast lookup table. */
2157
2158       if (fc < 128)
2159         {
2160         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2161         }
2162
2163       /* Otherwise we must pick up the subject character */
2164
2165       else
2166         {
2167         unsigned int dc;
2168         GETCHARINC(dc, eptr);
2169         ecode += length;
2170
2171         /* If we have Unicode property support, we can use it to test the other
2172         case of the character, if there is one. */
2173
2174         if (fc != dc)
2175           {
2176 #ifdef SUPPORT_UCP
2177           if (dc != _pcre_ucp_othercase(fc))
2178 #endif
2179             RRETURN(MATCH_NOMATCH);
2180           }
2181         }
2182       }
2183     else
2184 #endif   /* SUPPORT_UTF8 */
2185
2186     /* Non-UTF-8 mode */
2187       {
2188       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2189       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2190       ecode += 2;
2191       }
2192     break;
2193
2194     /* Match a single character repeatedly. */
2195
2196     case OP_EXACT:
2197     min = max = GET2(ecode, 1);
2198     ecode += 3;
2199     goto REPEATCHAR;
2200
2201     case OP_POSUPTO:
2202     possessive = TRUE;
2203     /* Fall through */
2204
2205     case OP_UPTO:
2206     case OP_MINUPTO:
2207     min = 0;
2208     max = GET2(ecode, 1);
2209     minimize = *ecode == OP_MINUPTO;
2210     ecode += 3;
2211     goto REPEATCHAR;
2212
2213     case OP_POSSTAR:
2214     possessive = TRUE;
2215     min = 0;
2216     max = INT_MAX;
2217     ecode++;
2218     goto REPEATCHAR;
2219
2220     case OP_POSPLUS:
2221     possessive = TRUE;
2222     min = 1;
2223     max = INT_MAX;
2224     ecode++;
2225     goto REPEATCHAR;
2226
2227     case OP_POSQUERY:
2228     possessive = TRUE;
2229     min = 0;
2230     max = 1;
2231     ecode++;
2232     goto REPEATCHAR;
2233
2234     case OP_STAR:
2235     case OP_MINSTAR:
2236     case OP_PLUS:
2237     case OP_MINPLUS:
2238     case OP_QUERY:
2239     case OP_MINQUERY:
2240     c = *ecode++ - OP_STAR;
2241     minimize = (c & 1) != 0;
2242     min = rep_min[c];                 /* Pick up values from tables; */
2243     max = rep_max[c];                 /* zero for max => infinity */
2244     if (max == 0) max = INT_MAX;
2245
2246     /* Common code for all repeated single-character matches. We can give
2247     up quickly if there are fewer than the minimum number of characters left in
2248     the subject. */
2249
2250     REPEATCHAR:
2251 #ifdef SUPPORT_UTF8
2252     if (utf8)
2253       {
2254       length = 1;
2255       charptr = ecode;
2256       GETCHARLEN(fc, ecode, length);
2257       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2258       ecode += length;
2259
2260       /* Handle multibyte character matching specially here. There is
2261       support for caseless matching if UCP support is present. */
2262
2263       if (length > 1)
2264         {
2265 #ifdef SUPPORT_UCP
2266         unsigned int othercase;
2267         if ((ims & PCRE_CASELESS) != 0 &&
2268             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2269           oclength = _pcre_ord2utf8(othercase, occhars);
2270         else oclength = 0;
2271 #endif  /* SUPPORT_UCP */
2272
2273         for (i = 1; i <= min; i++)
2274           {
2275           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2276 #ifdef SUPPORT_UCP
2277           /* Need braces because of following else */
2278           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2279           else
2280             {
2281             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2282             eptr += oclength;
2283             }
2284 #else   /* without SUPPORT_UCP */
2285           else { RRETURN(MATCH_NOMATCH); }
2286 #endif  /* SUPPORT_UCP */
2287           }
2288
2289         if (min == max) continue;
2290
2291         if (minimize)
2292           {
2293           for (fi = min;; fi++)
2294             {
2295             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2296             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2297             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2298             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2299 #ifdef SUPPORT_UCP
2300             /* Need braces because of following else */
2301             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2302             else
2303               {
2304               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2305               eptr += oclength;
2306               }
2307 #else   /* without SUPPORT_UCP */
2308             else { RRETURN (MATCH_NOMATCH); }
2309 #endif  /* SUPPORT_UCP */
2310             }
2311           /* Control never gets here */
2312           }
2313
2314         else  /* Maximize */
2315           {
2316           pp = eptr;
2317           for (i = min; i < max; i++)
2318             {
2319             if (eptr > md->end_subject - length) break;
2320             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2321 #ifdef SUPPORT_UCP
2322             else if (oclength == 0) break;
2323             else
2324               {
2325               if (memcmp(eptr, occhars, oclength) != 0) break;
2326               eptr += oclength;
2327               }
2328 #else   /* without SUPPORT_UCP */
2329             else break;
2330 #endif  /* SUPPORT_UCP */
2331             }
2332
2333           if (possessive) continue;
2334           for(;;)
2335            {
2336            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2337            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2338            if (eptr == pp) RRETURN(MATCH_NOMATCH);
2339 #ifdef SUPPORT_UCP
2340            eptr--;
2341            BACKCHAR(eptr);
2342 #else   /* without SUPPORT_UCP */
2343            eptr -= length;
2344 #endif  /* SUPPORT_UCP */
2345            }
2346           }
2347         /* Control never gets here */
2348         }
2349
2350       /* If the length of a UTF-8 character is 1, we fall through here, and
2351       obey the code as for non-UTF-8 characters below, though in this case the
2352       value of fc will always be < 128. */
2353       }
2354     else
2355 #endif  /* SUPPORT_UTF8 */
2356
2357     /* When not in UTF-8 mode, load a single-byte character. */
2358       {
2359       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2360       fc = *ecode++;
2361       }
2362
2363     /* The value of fc at this point is always less than 256, though we may or
2364     may not be in UTF-8 mode. The code is duplicated for the caseless and
2365     caseful cases, for speed, since matching characters is likely to be quite
2366     common. First, ensure the minimum number of matches are present. If min =
2367     max, continue at the same level without recursing. Otherwise, if
2368     minimizing, keep trying the rest of the expression and advancing one
2369     matching character if failing, up to the maximum. Alternatively, if
2370     maximizing, find the maximum number of characters and work backwards. */
2371
2372     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2373       max, eptr));
2374
2375     if ((ims & PCRE_CASELESS) != 0)
2376       {
2377       fc = md->lcc[fc];
2378       for (i = 1; i <= min; i++)
2379         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2380       if (min == max) continue;
2381       if (minimize)
2382         {
2383         for (fi = min;; fi++)
2384           {
2385           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2386           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2387           if (fi >= max || eptr >= md->end_subject ||
2388               fc != md->lcc[*eptr++])
2389             RRETURN(MATCH_NOMATCH);
2390           }
2391         /* Control never gets here */
2392         }
2393       else  /* Maximize */
2394         {
2395         pp = eptr;
2396         for (i = min; i < max; i++)
2397           {
2398           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2399           eptr++;
2400           }
2401         if (possessive) continue;
2402         while (eptr >= pp)
2403           {
2404           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2405           eptr--;
2406           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2407           }
2408         RRETURN(MATCH_NOMATCH);
2409         }
2410       /* Control never gets here */
2411       }
2412
2413     /* Caseful comparisons (includes all multi-byte characters) */
2414
2415     else
2416       {
2417       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2418       if (min == max) continue;
2419       if (minimize)
2420         {
2421         for (fi = min;; fi++)
2422           {
2423           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2424           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2425           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2426             RRETURN(MATCH_NOMATCH);
2427           }
2428         /* Control never gets here */
2429         }
2430       else  /* Maximize */
2431         {
2432         pp = eptr;
2433         for (i = min; i < max; i++)
2434           {
2435           if (eptr >= md->end_subject || fc != *eptr) break;
2436           eptr++;
2437           }
2438         if (possessive) continue;
2439         while (eptr >= pp)
2440           {
2441           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2442           eptr--;
2443           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2444           }
2445         RRETURN(MATCH_NOMATCH);
2446         }
2447       }
2448     /* Control never gets here */
2449
2450     /* Match a negated single one-byte character. The character we are
2451     checking can be multibyte. */
2452
2453     case OP_NOT:
2454     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2455     ecode++;
2456     GETCHARINCTEST(c, eptr);
2457     if ((ims & PCRE_CASELESS) != 0)
2458       {
2459 #ifdef SUPPORT_UTF8
2460       if (c < 256)
2461 #endif
2462       c = md->lcc[c];
2463       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2464       }
2465     else
2466       {
2467       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2468       }
2469     break;
2470
2471     /* Match a negated single one-byte character repeatedly. This is almost a
2472     repeat of the code for a repeated single character, but I haven't found a
2473     nice way of commoning these up that doesn't require a test of the
2474     positive/negative option for each character match. Maybe that wouldn't add
2475     very much to the time taken, but character matching *is* what this is all
2476     about... */
2477
2478     case OP_NOTEXACT:
2479     min = max = GET2(ecode, 1);
2480     ecode += 3;
2481     goto REPEATNOTCHAR;
2482
2483     case OP_NOTUPTO:
2484     case OP_NOTMINUPTO:
2485     min = 0;
2486     max = GET2(ecode, 1);
2487     minimize = *ecode == OP_NOTMINUPTO;
2488     ecode += 3;
2489     goto REPEATNOTCHAR;
2490
2491     case OP_NOTPOSSTAR:
2492     possessive = TRUE;
2493     min = 0;
2494     max = INT_MAX;
2495     ecode++;
2496     goto REPEATNOTCHAR;
2497
2498     case OP_NOTPOSPLUS:
2499     possessive = TRUE;
2500     min = 1;
2501     max = INT_MAX;
2502     ecode++;
2503     goto REPEATNOTCHAR;
2504
2505     case OP_NOTPOSQUERY:
2506     possessive = TRUE;
2507     min = 0;
2508     max = 1;
2509     ecode++;
2510     goto REPEATNOTCHAR;
2511
2512     case OP_NOTPOSUPTO:
2513     possessive = TRUE;
2514     min = 0;
2515     max = GET2(ecode, 1);
2516     ecode += 3;
2517     goto REPEATNOTCHAR;
2518
2519     case OP_NOTSTAR:
2520     case OP_NOTMINSTAR:
2521     case OP_NOTPLUS:
2522     case OP_NOTMINPLUS:
2523     case OP_NOTQUERY:
2524     case OP_NOTMINQUERY:
2525     c = *ecode++ - OP_NOTSTAR;
2526     minimize = (c & 1) != 0;
2527     min = rep_min[c];                 /* Pick up values from tables; */
2528     max = rep_max[c];                 /* zero for max => infinity */
2529     if (max == 0) max = INT_MAX;
2530
2531     /* Common code for all repeated single-byte matches. We can give up quickly
2532     if there are fewer than the minimum number of bytes left in the
2533     subject. */
2534
2535     REPEATNOTCHAR:
2536     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2537     fc = *ecode++;
2538
2539     /* The code is duplicated for the caseless and caseful cases, for speed,
2540     since matching characters is likely to be quite common. First, ensure the
2541     minimum number of matches are present. If min = max, continue at the same
2542     level without recursing. Otherwise, if minimizing, keep trying the rest of
2543     the expression and advancing one matching character if failing, up to the
2544     maximum. Alternatively, if maximizing, find the maximum number of
2545     characters and work backwards. */
2546
2547     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2548       max, eptr));
2549
2550     if ((ims & PCRE_CASELESS) != 0)
2551       {
2552       fc = md->lcc[fc];
2553
2554 #ifdef SUPPORT_UTF8
2555       /* UTF-8 mode */
2556       if (utf8)
2557         {
2558         register unsigned int d;
2559         for (i = 1; i <= min; i++)
2560           {
2561           GETCHARINC(d, eptr);
2562           if (d < 256) d = md->lcc[d];
2563           if (fc == d) RRETURN(MATCH_NOMATCH);
2564           }
2565         }
2566       else
2567 #endif
2568
2569       /* Not UTF-8 mode */
2570         {
2571         for (i = 1; i <= min; i++)
2572           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2573         }
2574
2575       if (min == max) continue;
2576
2577       if (minimize)
2578         {
2579 #ifdef SUPPORT_UTF8
2580         /* UTF-8 mode */
2581         if (utf8)
2582           {
2583           register unsigned int d;
2584           for (fi = min;; fi++)
2585             {
2586             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2587             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2588             GETCHARINC(d, eptr);
2589             if (d < 256) d = md->lcc[d];
2590             if (fi >= max || eptr >= md->end_subject || fc == d)
2591               RRETURN(MATCH_NOMATCH);
2592             }
2593           }
2594         else
2595 #endif
2596         /* Not UTF-8 mode */
2597           {
2598           for (fi = min;; fi++)
2599             {
2600             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2601             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2602             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2603               RRETURN(MATCH_NOMATCH);
2604             }
2605           }
2606         /* Control never gets here */
2607         }
2608
2609       /* Maximize case */
2610
2611       else
2612         {
2613         pp = eptr;
2614
2615 #ifdef SUPPORT_UTF8
2616         /* UTF-8 mode */
2617         if (utf8)
2618           {
2619           register unsigned int d;
2620           for (i = min; i < max; i++)
2621             {
2622             int len = 1;
2623             if (eptr >= md->end_subject) break;
2624             GETCHARLEN(d, eptr, len);
2625             if (d < 256) d = md->lcc[d];
2626             if (fc == d) break;
2627             eptr += len;
2628             }
2629         if (possessive) continue;
2630         for(;;)
2631             {
2632             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2633             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2634             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2635             BACKCHAR(eptr);
2636             }
2637           }
2638         else
2639 #endif
2640         /* Not UTF-8 mode */
2641           {
2642           for (i = min; i < max; i++)
2643             {
2644             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2645             eptr++;
2646             }
2647           if (possessive) continue;
2648           while (eptr >= pp)
2649             {
2650             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2651             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652             eptr--;
2653             }
2654           }
2655
2656         RRETURN(MATCH_NOMATCH);
2657         }
2658       /* Control never gets here */
2659       }
2660
2661     /* Caseful comparisons */
2662
2663     else
2664       {
2665 #ifdef SUPPORT_UTF8
2666       /* UTF-8 mode */
2667       if (utf8)
2668         {
2669         register unsigned int d;
2670         for (i = 1; i <= min; i++)
2671           {
2672           GETCHARINC(d, eptr);
2673           if (fc == d) RRETURN(MATCH_NOMATCH);
2674           }
2675         }
2676       else
2677 #endif
2678       /* Not UTF-8 mode */
2679         {
2680         for (i = 1; i <= min; i++)
2681           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2682         }
2683
2684       if (min == max) continue;
2685
2686       if (minimize)
2687         {
2688 #ifdef SUPPORT_UTF8
2689         /* UTF-8 mode */
2690         if (utf8)
2691           {
2692           register unsigned int d;
2693           for (fi = min;; fi++)
2694             {
2695             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2696             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697             GETCHARINC(d, eptr);
2698             if (fi >= max || eptr >= md->end_subject || fc == d)
2699               RRETURN(MATCH_NOMATCH);
2700             }
2701           }
2702         else
2703 #endif
2704         /* Not UTF-8 mode */
2705           {
2706           for (fi = min;; fi++)
2707             {
2708             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2709             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2710             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2711               RRETURN(MATCH_NOMATCH);
2712             }
2713           }
2714         /* Control never gets here */
2715         }
2716
2717       /* Maximize case */
2718
2719       else
2720         {
2721         pp = eptr;
2722
2723 #ifdef SUPPORT_UTF8
2724         /* UTF-8 mode */
2725         if (utf8)
2726           {
2727           register unsigned int d;
2728           for (i = min; i < max; i++)
2729             {
2730             int len = 1;
2731             if (eptr >= md->end_subject) break;
2732             GETCHARLEN(d, eptr, len);
2733             if (fc == d) break;
2734             eptr += len;
2735             }
2736           if (possessive) continue;
2737           for(;;)
2738             {
2739             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2740             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2741             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2742             BACKCHAR(eptr);
2743             }
2744           }
2745         else
2746 #endif
2747         /* Not UTF-8 mode */
2748           {
2749           for (i = min; i < max; i++)
2750             {
2751             if (eptr >= md->end_subject || fc == *eptr) break;
2752             eptr++;
2753             }
2754           if (possessive) continue;
2755           while (eptr >= pp)
2756             {
2757             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2758             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2759             eptr--;
2760             }
2761           }
2762
2763         RRETURN(MATCH_NOMATCH);
2764         }
2765       }
2766     /* Control never gets here */
2767
2768     /* Match a single character type repeatedly; several different opcodes
2769     share code. This is very similar to the code for single characters, but we
2770     repeat it in the interests of efficiency. */
2771
2772     case OP_TYPEEXACT:
2773     min = max = GET2(ecode, 1);
2774     minimize = TRUE;
2775     ecode += 3;
2776     goto REPEATTYPE;
2777
2778     case OP_TYPEUPTO:
2779     case OP_TYPEMINUPTO:
2780     min = 0;
2781     max = GET2(ecode, 1);
2782     minimize = *ecode == OP_TYPEMINUPTO;
2783     ecode += 3;
2784     goto REPEATTYPE;
2785
2786     case OP_TYPEPOSSTAR:
2787     possessive = TRUE;
2788     min = 0;
2789     max = INT_MAX;
2790     ecode++;
2791     goto REPEATTYPE;
2792
2793     case OP_TYPEPOSPLUS:
2794     possessive = TRUE;
2795     min = 1;
2796     max = INT_MAX;
2797     ecode++;
2798     goto REPEATTYPE;
2799
2800     case OP_TYPEPOSQUERY:
2801     possessive = TRUE;
2802     min = 0;
2803     max = 1;
2804     ecode++;
2805     goto REPEATTYPE;
2806
2807     case OP_TYPEPOSUPTO:
2808     possessive = TRUE;
2809     min = 0;
2810     max = GET2(ecode, 1);
2811     ecode += 3;
2812     goto REPEATTYPE;
2813
2814     case OP_TYPESTAR:
2815     case OP_TYPEMINSTAR:
2816     case OP_TYPEPLUS:
2817     case OP_TYPEMINPLUS:
2818     case OP_TYPEQUERY:
2819     case OP_TYPEMINQUERY:
2820     c = *ecode++ - OP_TYPESTAR;
2821     minimize = (c & 1) != 0;
2822     min = rep_min[c];                 /* Pick up values from tables; */
2823     max = rep_max[c];                 /* zero for max => infinity */
2824     if (max == 0) max = INT_MAX;
2825
2826     /* Common code for all repeated single character type matches. Note that
2827     in UTF-8 mode, '.' matches a character of any length, but for the other
2828     character types, the valid characters are all one-byte long. */
2829
2830     REPEATTYPE:
2831     ctype = *ecode++;      /* Code for the character type */
2832
2833 #ifdef SUPPORT_UCP
2834     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2835       {
2836       prop_fail_result = ctype == OP_NOTPROP;
2837       prop_type = *ecode++;
2838       prop_value = *ecode++;
2839       }
2840     else prop_type = -1;
2841 #endif
2842
2843     /* First, ensure the minimum number of matches are present. Use inline
2844     code for maximizing the speed, and do the type test once at the start
2845     (i.e. keep it out of the loop). Also we can test that there are at least
2846     the minimum number of bytes before we start. This isn't as effective in
2847     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2848     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2849     and single-bytes. */
2850
2851     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2852     if (min > 0)
2853       {
2854 #ifdef SUPPORT_UCP
2855       if (prop_type >= 0)
2856         {
2857         switch(prop_type)
2858           {
2859           case PT_ANY:
2860           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2861           for (i = 1; i <= min; i++)
2862             {
2863             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2864             GETCHARINCTEST(c, eptr);
2865             }
2866           break;
2867
2868           case PT_LAMP:
2869           for (i = 1; i <= min; i++)
2870             {
2871             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2872             GETCHARINCTEST(c, eptr);
2873             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2874             if ((prop_chartype == ucp_Lu ||
2875                  prop_chartype == ucp_Ll ||
2876                  prop_chartype == ucp_Lt) == prop_fail_result)
2877               RRETURN(MATCH_NOMATCH);
2878             }
2879           break;
2880
2881           case PT_GC:
2882           for (i = 1; i <= min; i++)
2883             {
2884             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2885             GETCHARINCTEST(c, eptr);
2886             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2887             if ((prop_category == prop_value) == prop_fail_result)
2888               RRETURN(MATCH_NOMATCH);
2889             }
2890           break;
2891
2892           case PT_PC:
2893           for (i = 1; i <= min; i++)
2894             {
2895             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2896             GETCHARINCTEST(c, eptr);
2897             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2898             if ((prop_chartype == prop_value) == prop_fail_result)
2899               RRETURN(MATCH_NOMATCH);
2900             }
2901           break;
2902
2903           case PT_SC:
2904           for (i = 1; i <= min; i++)
2905             {
2906             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2907             GETCHARINCTEST(c, eptr);
2908             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2909             if ((prop_script == prop_value) == prop_fail_result)
2910               RRETURN(MATCH_NOMATCH);
2911             }
2912           break;
2913
2914           default:
2915           RRETURN(PCRE_ERROR_INTERNAL);
2916           }
2917         }
2918
2919       /* Match extended Unicode sequences. We will get here only if the
2920       support is in the binary; otherwise a compile-time error occurs. */
2921
2922       else if (ctype == OP_EXTUNI)
2923         {
2924         for (i = 1; i <= min; i++)
2925           {
2926           GETCHARINCTEST(c, eptr);
2927           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2928           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2929           while (eptr < md->end_subject)
2930             {
2931             int len = 1;
2932             if (!utf8) c = *eptr; else
2933               {
2934               GETCHARLEN(c, eptr, len);
2935               }
2936             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2937             if (prop_category != ucp_M) break;
2938             eptr += len;
2939             }
2940           }
2941         }
2942
2943       else
2944 #endif     /* SUPPORT_UCP */
2945
2946 /* Handle all other cases when the coding is UTF-8 */
2947
2948 #ifdef SUPPORT_UTF8
2949       if (utf8) switch(ctype)
2950         {
2951         case OP_ANY:
2952         for (i = 1; i <= min; i++)
2953           {
2954           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
2955             RRETURN(MATCH_NOMATCH);
2956           eptr++;
2957           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2958           }
2959         break;
2960
2961         case OP_ALLANY:
2962         for (i = 1; i <= min; i++)
2963           {
2964           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2965           eptr++;
2966           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2967           }
2968         break;
2969
2970         case OP_ANYBYTE:
2971         eptr += min;
2972         break;
2973
2974         case OP_ANYNL:
2975         for (i = 1; i <= min; i++)
2976           {
2977           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2978           GETCHARINC(c, eptr);
2979           switch(c)
2980             {
2981             default: RRETURN(MATCH_NOMATCH);
2982             case 0x000d:
2983             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2984             break;
2985
2986             case 0x000a:
2987             break;
2988
2989             case 0x000b:
2990             case 0x000c:
2991             case 0x0085:
2992             case 0x2028:
2993             case 0x2029:
2994             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2995             break;
2996             }
2997           }
2998         break;
2999
3000         case OP_NOT_HSPACE:
3001         for (i = 1; i <= min; i++)
3002           {
3003           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3004           GETCHARINC(c, eptr);
3005           switch(c)
3006             {
3007             default: break;
3008             case 0x09:      /* HT */
3009             case 0x20:      /* SPACE */
3010             case 0xa0:      /* NBSP */
3011             case 0x1680:    /* OGHAM SPACE MARK */
3012             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3013             case 0x2000:    /* EN QUAD */
3014             case 0x2001:    /* EM QUAD */
3015             case 0x2002:    /* EN SPACE */
3016             case 0x2003:    /* EM SPACE */
3017             case 0x2004:    /* THREE-PER-EM SPACE */
3018             case 0x2005:    /* FOUR-PER-EM SPACE */
3019             case 0x2006:    /* SIX-PER-EM SPACE */
3020             case 0x2007:    /* FIGURE SPACE */
3021             case 0x2008:    /* PUNCTUATION SPACE */
3022             case 0x2009:    /* THIN SPACE */
3023             case 0x200A:    /* HAIR SPACE */
3024             case 0x202f:    /* NARROW NO-BREAK SPACE */
3025             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3026             case 0x3000:    /* IDEOGRAPHIC SPACE */
3027             RRETURN(MATCH_NOMATCH);
3028             }
3029           }
3030         break;
3031
3032         case OP_HSPACE:
3033         for (i = 1; i <= min; i++)
3034           {
3035           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3036           GETCHARINC(c, eptr);
3037           switch(c)
3038             {
3039             default: RRETURN(MATCH_NOMATCH);
3040             case 0x09:      /* HT */
3041             case 0x20:      /* SPACE */
3042             case 0xa0:      /* NBSP */
3043             case 0x1680:    /* OGHAM SPACE MARK */
3044             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3045             case 0x2000:    /* EN QUAD */
3046             case 0x2001:    /* EM QUAD */
3047             case 0x2002:    /* EN SPACE */
3048             case 0x2003:    /* EM SPACE */
3049             case 0x2004:    /* THREE-PER-EM SPACE */
3050             case 0x2005:    /* FOUR-PER-EM SPACE */
3051             case 0x2006:    /* SIX-PER-EM SPACE */
3052             case 0x2007:    /* FIGURE SPACE */
3053             case 0x2008:    /* PUNCTUATION SPACE */
3054             case 0x2009:    /* THIN SPACE */
3055             case 0x200A:    /* HAIR SPACE */
3056             case 0x202f:    /* NARROW NO-BREAK SPACE */
3057             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3058             case 0x3000:    /* IDEOGRAPHIC SPACE */
3059             break;
3060             }
3061           }
3062         break;
3063
3064         case OP_NOT_VSPACE:
3065         for (i = 1; i <= min; i++)
3066           {
3067           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3068           GETCHARINC(c, eptr);
3069           switch(c)
3070             {
3071             default: break;
3072             case 0x0a:      /* LF */
3073             case 0x0b:      /* VT */
3074             case 0x0c:      /* FF */
3075             case 0x0d:      /* CR */
3076             case 0x85:      /* NEL */
3077             case 0x2028:    /* LINE SEPARATOR */
3078             case 0x2029:    /* PARAGRAPH SEPARATOR */
3079             RRETURN(MATCH_NOMATCH);
3080             }
3081           }
3082         break;
3083
3084         case OP_VSPACE:
3085         for (i = 1; i <= min; i++)
3086           {
3087           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3088           GETCHARINC(c, eptr);
3089           switch(c)
3090             {
3091             default: RRETURN(MATCH_NOMATCH);
3092             case 0x0a:      /* LF */
3093             case 0x0b:      /* VT */
3094             case 0x0c:      /* FF */
3095             case 0x0d:      /* CR */
3096             case 0x85:      /* NEL */
3097             case 0x2028:    /* LINE SEPARATOR */
3098             case 0x2029:    /* PARAGRAPH SEPARATOR */
3099             break;
3100             }
3101           }
3102         break;
3103
3104         case OP_NOT_DIGIT:
3105         for (i = 1; i <= min; i++)
3106           {
3107           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3108           GETCHARINC(c, eptr);
3109           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3110             RRETURN(MATCH_NOMATCH);
3111           }
3112         break;
3113
3114         case OP_DIGIT:
3115         for (i = 1; i <= min; i++)
3116           {
3117           if (eptr >= md->end_subject ||
3118              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3119             RRETURN(MATCH_NOMATCH);
3120           /* No need to skip more bytes - we know it's a 1-byte character */
3121           }
3122         break;
3123
3124         case OP_NOT_WHITESPACE:
3125         for (i = 1; i <= min; i++)
3126           {
3127           if (eptr >= md->end_subject ||
3128              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3129             RRETURN(MATCH_NOMATCH);
3130           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3131           }
3132         break;
3133
3134         case OP_WHITESPACE:
3135         for (i = 1; i <= min; i++)
3136           {
3137           if (eptr >= md->end_subject ||
3138              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3139             RRETURN(MATCH_NOMATCH);
3140           /* No need to skip more bytes - we know it's a 1-byte character */
3141           }
3142         break;
3143
3144         case OP_NOT_WORDCHAR:
3145         for (i = 1; i <= min; i++)
3146           {
3147           if (eptr >= md->end_subject ||
3148              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3149             RRETURN(MATCH_NOMATCH);
3150           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3151           }
3152         break;
3153
3154         case OP_WORDCHAR:
3155         for (i = 1; i <= min; i++)
3156           {
3157           if (eptr >= md->end_subject ||
3158              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3159             RRETURN(MATCH_NOMATCH);
3160           /* No need to skip more bytes - we know it's a 1-byte character */
3161           }
3162         break;
3163
3164         default:
3165         RRETURN(PCRE_ERROR_INTERNAL);
3166         }  /* End switch(ctype) */
3167
3168       else
3169 #endif     /* SUPPORT_UTF8 */
3170
3171       /* Code for the non-UTF-8 case for minimum matching of operators other
3172       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3173       number of bytes present, as this was tested above. */
3174
3175       switch(ctype)
3176         {
3177         case OP_ANY:
3178         for (i = 1; i <= min; i++)
3179           {
3180           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3181           eptr++;
3182           }
3183         break;
3184
3185         case OP_ALLANY:
3186         eptr += min;
3187         break;
3188
3189         case OP_ANYBYTE:
3190         eptr += min;
3191         break;
3192
3193         /* Because of the CRLF case, we can't assume the minimum number of
3194         bytes are present in this case. */
3195
3196         case OP_ANYNL:
3197         for (i = 1; i <= min; i++)
3198           {
3199           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3200           switch(*eptr++)
3201             {
3202             default: RRETURN(MATCH_NOMATCH);
3203             case 0x000d:
3204             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3205             break;
3206             case 0x000a:
3207             break;
3208
3209             case 0x000b:
3210             case 0x000c:
3211             case 0x0085:
3212             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3213             break;
3214             }
3215           }
3216         break;
3217
3218         case OP_NOT_HSPACE:
3219         for (i = 1; i <= min; i++)
3220           {
3221           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3222           switch(*eptr++)
3223             {
3224             default: break;
3225             case 0x09:      /* HT */
3226             case 0x20:      /* SPACE */
3227             case 0xa0:      /* NBSP */
3228             RRETURN(MATCH_NOMATCH);
3229             }
3230           }
3231         break;
3232
3233         case OP_HSPACE:
3234         for (i = 1; i <= min; i++)
3235           {
3236           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3237           switch(*eptr++)
3238             {
3239             default: RRETURN(MATCH_NOMATCH);
3240             case 0x09:      /* HT */
3241             case 0x20:      /* SPACE */
3242             case 0xa0:      /* NBSP */
3243             break;
3244             }
3245           }
3246         break;
3247
3248         case OP_NOT_VSPACE:
3249         for (i = 1; i <= min; i++)
3250           {
3251           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3252           switch(*eptr++)
3253             {
3254             default: break;
3255             case 0x0a:      /* LF */
3256             case 0x0b:      /* VT */
3257             case 0x0c:      /* FF */
3258             case 0x0d:      /* CR */
3259             case 0x85:      /* NEL */
3260             RRETURN(MATCH_NOMATCH);
3261             }
3262           }
3263         break;
3264
3265         case OP_VSPACE:
3266         for (i = 1; i <= min; i++)
3267           {
3268           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3269           switch(*eptr++)
3270             {
3271             default: RRETURN(MATCH_NOMATCH);
3272             case 0x0a:      /* LF */
3273             case 0x0b:      /* VT */
3274             case 0x0c:      /* FF */
3275             case 0x0d:      /* CR */
3276             case 0x85:      /* NEL */
3277             break;
3278             }
3279           }
3280         break;
3281
3282         case OP_NOT_DIGIT:
3283         for (i = 1; i <= min; i++)
3284           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3285         break;
3286
3287         case OP_DIGIT:
3288         for (i = 1; i <= min; i++)
3289           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3290         break;
3291
3292         case OP_NOT_WHITESPACE:
3293         for (i = 1; i <= min; i++)
3294           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3295         break;
3296
3297         case OP_WHITESPACE:
3298         for (i = 1; i <= min; i++)
3299           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3300         break;
3301
3302         case OP_NOT_WORDCHAR:
3303         for (i = 1; i <= min; i++)
3304           if ((md->ctypes[*eptr++] & ctype_word) != 0)
3305             RRETURN(MATCH_NOMATCH);
3306         break;
3307
3308         case OP_WORDCHAR:
3309         for (i = 1; i <= min; i++)
3310           if ((md->ctypes[*eptr++] & ctype_word) == 0)
3311             RRETURN(MATCH_NOMATCH);
3312         break;
3313
3314         default:
3315         RRETURN(PCRE_ERROR_INTERNAL);
3316         }
3317       }
3318
3319     /* If min = max, continue at the same level without recursing */
3320
3321     if (min == max) continue;
3322
3323     /* If minimizing, we have to test the rest of the pattern before each
3324     subsequent match. Again, separate the UTF-8 case for speed, and also
3325     separate the UCP cases. */
3326
3327     if (minimize)
3328       {
3329 #ifdef SUPPORT_UCP
3330       if (prop_type >= 0)
3331         {
3332         switch(prop_type)
3333           {
3334           case PT_ANY:
3335           for (fi = min;; fi++)
3336             {
3337             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3338             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3339             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3340             GETCHARINC(c, eptr);
3341             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3342             }
3343           /* Control never gets here */
3344
3345           case PT_LAMP:
3346           for (fi = min;; fi++)
3347             {
3348             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3349             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3350             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3351             GETCHARINC(c, eptr);
3352             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3353             if ((prop_chartype == ucp_Lu ||
3354                  prop_chartype == ucp_Ll ||
3355                  prop_chartype == ucp_Lt) == prop_fail_result)
3356               RRETURN(MATCH_NOMATCH);
3357             }
3358           /* Control never gets here */
3359
3360           case PT_GC:
3361           for (fi = min;; fi++)
3362             {
3363             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3364             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3365             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3366             GETCHARINC(c, eptr);
3367             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3368             if ((prop_category == prop_value) == prop_fail_result)
3369               RRETURN(MATCH_NOMATCH);
3370             }
3371           /* Control never gets here */
3372
3373           case PT_PC:
3374           for (fi = min;; fi++)
3375             {
3376             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3377             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3378             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3379             GETCHARINC(c, eptr);
3380             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3381             if ((prop_chartype == prop_value) == prop_fail_result)
3382               RRETURN(MATCH_NOMATCH);
3383             }
3384           /* Control never gets here */
3385
3386           case PT_SC:
3387           for (fi = min;; fi++)
3388             {
3389             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3390             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3391             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3392             GETCHARINC(c, eptr);
3393             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3394             if ((prop_script == prop_value) == prop_fail_result)
3395               RRETURN(MATCH_NOMATCH);
3396             }
3397           /* Control never gets here */
3398
3399           default:
3400           RRETURN(PCRE_ERROR_INTERNAL);
3401           }
3402         }
3403
3404       /* Match extended Unicode sequences. We will get here only if the
3405       support is in the binary; otherwise a compile-time error occurs. */
3406
3407       else if (ctype == OP_EXTUNI)
3408         {
3409         for (fi = min;; fi++)
3410           {
3411           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3412           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3413           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3414           GETCHARINCTEST(c, eptr);
3415           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3416           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3417           while (eptr < md->end_subject)
3418             {
3419             int len = 1;
3420             if (!utf8) c = *eptr; else
3421               {
3422               GETCHARLEN(c, eptr, len);
3423               }
3424             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3425             if (prop_category != ucp_M) break;
3426             eptr += len;
3427             }
3428           }
3429         }
3430
3431       else
3432 #endif     /* SUPPORT_UCP */
3433
3434 #ifdef SUPPORT_UTF8
3435       /* UTF-8 mode */
3436       if (utf8)
3437         {
3438         for (fi = min;; fi++)
3439           {
3440           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3441           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3442           if (fi >= max || eptr >= md->end_subject ||
3443                (ctype == OP_ANY && IS_NEWLINE(eptr)))
3444             RRETURN(MATCH_NOMATCH);
3445
3446           GETCHARINC(c, eptr);
3447           switch(ctype)
3448             {
3449             case OP_ANY:        /* This is the non-NL case */
3450             case OP_ALLANY:
3451             case OP_ANYBYTE:
3452             break;
3453
3454             case OP_ANYNL:
3455             switch(c)
3456               {
3457               default: RRETURN(MATCH_NOMATCH);
3458               case 0x000d:
3459               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3460               break;
3461               case 0x000a:
3462               break;
3463
3464               case 0x000b:
3465               case 0x000c:
3466               case 0x0085:
3467               case 0x2028:
3468               case 0x2029:
3469               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3470               break;
3471               }
3472             break;
3473
3474             case OP_NOT_HSPACE:
3475             switch(c)
3476               {
3477               default: break;
3478               case 0x09:      /* HT */
3479               case 0x20:      /* SPACE */
3480               case 0xa0:      /* NBSP */
3481               case 0x1680:    /* OGHAM SPACE MARK */
3482               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3483               case 0x2000:    /* EN QUAD */
3484               case 0x2001:    /* EM QUAD */
3485               case 0x2002:    /* EN SPACE */
3486               case 0x2003:    /* EM SPACE */
3487               case 0x2004:    /* THREE-PER-EM SPACE */
3488               case 0x2005:    /* FOUR-PER-EM SPACE */
3489               case 0x2006:    /* SIX-PER-EM SPACE */
3490               case 0x2007:    /* FIGURE SPACE */
3491               case 0x2008:    /* PUNCTUATION SPACE */
3492               case 0x2009:    /* THIN SPACE */
3493               case 0x200A:    /* HAIR SPACE */
3494               case 0x202f:    /* NARROW NO-BREAK SPACE */
3495               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3496               case 0x3000:    /* IDEOGRAPHIC SPACE */
3497               RRETURN(MATCH_NOMATCH);
3498               }
3499             break;
3500
3501             case OP_HSPACE:
3502             switch(c)
3503               {
3504               default: RRETURN(MATCH_NOMATCH);
3505               case 0x09:      /* HT */
3506               case 0x20:      /* SPACE */
3507               case 0xa0:      /* NBSP */
3508               case 0x1680:    /* OGHAM SPACE MARK */
3509               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3510               case 0x2000:    /* EN QUAD */
3511               case 0x2001:    /* EM QUAD */
3512               case 0x2002:    /* EN SPACE */
3513               case 0x2003:    /* EM SPACE */
3514               case 0x2004:    /* THREE-PER-EM SPACE */
3515               case 0x2005:    /* FOUR-PER-EM SPACE */
3516               case 0x2006:    /* SIX-PER-EM SPACE */
3517               case 0x2007:    /* FIGURE SPACE */
3518               case 0x2008:    /* PUNCTUATION SPACE */
3519               case 0x2009:    /* THIN SPACE */
3520               case 0x200A:    /* HAIR SPACE */
3521               case 0x202f:    /* NARROW NO-BREAK SPACE */
3522               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3523               case 0x3000:    /* IDEOGRAPHIC SPACE */
3524               break;
3525               }
3526             break;
3527
3528             case OP_NOT_VSPACE:
3529             switch(c)
3530               {
3531               default: break;
3532               case 0x0a:      /* LF */
3533               case 0x0b:      /* VT */
3534               case 0x0c:      /* FF */
3535               case 0x0d:      /* CR */
3536               case 0x85:      /* NEL */
3537               case 0x2028:    /* LINE SEPARATOR */
3538               case 0x2029:    /* PARAGRAPH SEPARATOR */
3539               RRETURN(MATCH_NOMATCH);
3540               }
3541             break;
3542
3543             case OP_VSPACE:
3544             switch(c)
3545               {
3546               default: RRETURN(MATCH_NOMATCH);
3547               case 0x0a:      /* LF */
3548               case 0x0b:      /* VT */
3549               case 0x0c:      /* FF */
3550               case 0x0d:      /* CR */
3551               case 0x85:      /* NEL */
3552               case 0x2028:    /* LINE SEPARATOR */
3553               case 0x2029:    /* PARAGRAPH SEPARATOR */
3554               break;
3555               }
3556             break;
3557
3558             case OP_NOT_DIGIT:
3559             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3560               RRETURN(MATCH_NOMATCH);
3561             break;
3562
3563             case OP_DIGIT:
3564             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3565               RRETURN(MATCH_NOMATCH);
3566             break;
3567
3568             case OP_NOT_WHITESPACE:
3569             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3570               RRETURN(MATCH_NOMATCH);
3571             break;
3572
3573             case OP_WHITESPACE:
3574             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3575               RRETURN(MATCH_NOMATCH);
3576             break;
3577
3578             case OP_NOT_WORDCHAR:
3579             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3580               RRETURN(MATCH_NOMATCH);
3581             break;
3582
3583             case OP_WORDCHAR:
3584             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3585               RRETURN(MATCH_NOMATCH);
3586             break;
3587
3588             default:
3589             RRETURN(PCRE_ERROR_INTERNAL);
3590             }
3591           }
3592         }
3593       else
3594 #endif
3595       /* Not UTF-8 mode */
3596         {
3597         for (fi = min;; fi++)
3598           {
3599           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3600           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3601           if (fi >= max || eptr >= md->end_subject ||
3602                (ctype == OP_ANY && IS_NEWLINE(eptr)))
3603             RRETURN(MATCH_NOMATCH);
3604
3605           c = *eptr++;
3606           switch(ctype)
3607             {
3608             case OP_ANY:     /* This is the non-NL case */
3609             case OP_ALLANY:
3610             case OP_ANYBYTE:
3611             break;
3612
3613             case OP_ANYNL:
3614             switch(c)
3615               {
3616               default: RRETURN(MATCH_NOMATCH);
3617               case 0x000d:
3618               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3619               break;
3620
3621               case 0x000a:
3622               break;
3623
3624               case 0x000b:
3625               case 0x000c:
3626               case 0x0085:
3627               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3628               break;
3629               }
3630             break;
3631
3632             case OP_NOT_HSPACE:
3633             switch(c)
3634               {
3635               default: break;
3636               case 0x09:      /* HT */
3637               case 0x20:      /* SPACE */
3638               case 0xa0:      /* NBSP */
3639               RRETURN(MATCH_NOMATCH);
3640               }
3641             break;
3642
3643             case OP_HSPACE:
3644             switch(c)
3645               {
3646               default: RRETURN(MATCH_NOMATCH);
3647               case 0x09:      /* HT */
3648               case 0x20:      /* SPACE */
3649               case 0xa0:      /* NBSP */
3650               break;
3651               }
3652             break;
3653
3654             case OP_NOT_VSPACE:
3655             switch(c)
3656               {
3657               default: break;
3658               case 0x0a:      /* LF */
3659               case 0x0b:      /* VT */
3660               case 0x0c:      /* FF */
3661               case 0x0d:      /* CR */
3662               case 0x85:      /* NEL */
3663               RRETURN(MATCH_NOMATCH);
3664               }
3665             break;
3666
3667             case OP_VSPACE:
3668             switch(c)
3669               {
3670               default: RRETURN(MATCH_NOMATCH);
3671               case 0x0a:      /* LF */
3672               case 0x0b:      /* VT */
3673               case 0x0c:      /* FF */
3674               case 0x0d:      /* CR */
3675               case 0x85:      /* NEL */
3676               break;
3677               }
3678             break;
3679
3680             case OP_NOT_DIGIT:
3681             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3682             break;
3683
3684             case OP_DIGIT:
3685             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3686             break;
3687
3688             case OP_NOT_WHITESPACE:
3689             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3690             break;
3691
3692             case OP_WHITESPACE:
3693             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3694             break;
3695
3696             case OP_NOT_WORDCHAR:
3697             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3698             break;
3699
3700             case OP_WORDCHAR:
3701             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3702             break;
3703
3704             default:
3705             RRETURN(PCRE_ERROR_INTERNAL);
3706             }
3707           }
3708         }
3709       /* Control never gets here */
3710       }
3711
3712     /* If maximizing, it is worth using inline code for speed, doing the type
3713     test once at the start (i.e. keep it out of the loop). Again, keep the
3714     UTF-8 and UCP stuff separate. */
3715
3716     else
3717       {
3718       pp = eptr;  /* Remember where we started */
3719
3720 #ifdef SUPPORT_UCP
3721       if (prop_type >= 0)
3722         {
3723         switch(prop_type)
3724           {
3725           case PT_ANY:
3726           for (i = min; i < max; i++)
3727             {
3728             int len = 1;
3729             if (eptr >= md->end_subject) break;
3730             GETCHARLEN(c, eptr, len);
3731             if (prop_fail_result) break;
3732             eptr+= len;
3733             }
3734           break;
3735
3736           case PT_LAMP:
3737           for (i = min; i < max; i++)
3738             {
3739             int len = 1;
3740             if (eptr >= md->end_subject) break;
3741             GETCHARLEN(c, eptr, len);
3742             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3743             if ((prop_chartype == ucp_Lu ||
3744                  prop_chartype == ucp_Ll ||
3745                  prop_chartype == ucp_Lt) == prop_fail_result)
3746               break;
3747             eptr+= len;
3748             }
3749           break;
3750
3751           case PT_GC:
3752           for (i = min; i < max; i++)
3753             {
3754             int len = 1;
3755             if (eptr >= md->end_subject) break;
3756             GETCHARLEN(c, eptr, len);
3757             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3758             if ((prop_category == prop_value) == prop_fail_result)
3759               break;
3760             eptr+= len;
3761             }
3762           break;
3763
3764           case PT_PC:
3765           for (i = min; i < max; i++)
3766             {
3767             int len = 1;
3768             if (eptr >= md->end_subject) break;
3769             GETCHARLEN(c, eptr, len);
3770             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3771             if ((prop_chartype == prop_value) == prop_fail_result)
3772               break;
3773             eptr+= len;
3774             }
3775           break;
3776
3777           case PT_SC:
3778           for (i = min; i < max; i++)
3779             {
3780             int len = 1;
3781             if (eptr >= md->end_subject) break;
3782             GETCHARLEN(c, eptr, len);
3783             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3784             if ((prop_script == prop_value) == prop_fail_result)
3785               break;
3786             eptr+= len;
3787             }
3788           break;
3789           }
3790
3791         /* eptr is now past the end of the maximum run */
3792
3793         if (possessive) continue;
3794         for(;;)
3795           {
3796           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3797           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3798           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3799           if (utf8) BACKCHAR(eptr);
3800           }
3801         }
3802
3803       /* Match extended Unicode sequences. We will get here only if the
3804       support is in the binary; otherwise a compile-time error occurs. */
3805
3806       else if (ctype == OP_EXTUNI)
3807         {
3808         for (i = min; i < max; i++)
3809           {
3810           if (eptr >= md->end_subject) break;
3811           GETCHARINCTEST(c, eptr);
3812           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3813           if (prop_category == ucp_M) break;
3814           while (eptr < md->end_subject)
3815             {
3816             int len = 1;
3817             if (!utf8) c = *eptr; else
3818               {
3819               GETCHARLEN(c, eptr, len);
3820               }
3821             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3822             if (prop_category != ucp_M) break;
3823             eptr += len;
3824             }
3825           }
3826
3827         /* eptr is now past the end of the maximum run */
3828
3829         if (possessive) continue;
3830         for(;;)
3831           {
3832           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3833           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3834           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3835           for (;;)                        /* Move back over one extended */
3836             {
3837             int len = 1;
3838             if (!utf8) c = *eptr; else
3839               {
3840               BACKCHAR(eptr);
3841               GETCHARLEN(c, eptr, len);
3842               }
3843             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3844             if (prop_category != ucp_M) break;
3845             eptr--;
3846             }
3847           }
3848         }
3849
3850       else
3851 #endif   /* SUPPORT_UCP */
3852
3853 #ifdef SUPPORT_UTF8
3854       /* UTF-8 mode */
3855
3856       if (utf8)
3857         {
3858         switch(ctype)
3859           {
3860           case OP_ANY:
3861           if (max < INT_MAX)
3862             {
3863             for (i = min; i < max; i++)
3864               {
3865               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3866               eptr++;
3867               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3868               }
3869             }
3870
3871           /* Handle unlimited UTF-8 repeat */
3872
3873           else
3874             {
3875             for (i = min; i < max; i++)
3876               {
3877               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3878               eptr++;
3879               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3880               }
3881             }
3882           break;
3883
3884           case OP_ALLANY:
3885           if (max < INT_MAX)
3886             {
3887             for (i = min; i < max; i++)
3888               {
3889               if (eptr >= md->end_subject) break;
3890               eptr++;
3891               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3892               }
3893             }
3894           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3895           break;
3896
3897           /* The byte case is the same as non-UTF8 */
3898
3899           case OP_ANYBYTE:
3900           c = max - min;
3901           if (c > (unsigned int)(md->end_subject - eptr))
3902             c = md->end_subject - eptr;
3903           eptr += c;
3904           break;
3905
3906           case OP_ANYNL:
3907           for (i = min; i < max; i++)
3908             {
3909             int len = 1;
3910             if (eptr >= md->end_subject) break;
3911             GETCHARLEN(c, eptr, len);
3912             if (c == 0x000d)
3913               {
3914               if (++eptr >= md->end_subject) break;
3915               if (*eptr == 0x000a) eptr++;
3916               }
3917             else
3918               {
3919               if (c != 0x000a &&
3920                   (md->bsr_anycrlf ||
3921                    (c != 0x000b && c != 0x000c &&
3922                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
3923                 break;
3924               eptr += len;
3925               }
3926             }
3927           break;
3928
3929           case OP_NOT_HSPACE:
3930           case OP_HSPACE:
3931           for (i = min; i < max; i++)
3932             {
3933             BOOL gotspace;
3934             int len = 1;
3935             if (eptr >= md->end_subject) break;
3936             GETCHARLEN(c, eptr, len);
3937             switch(c)
3938               {
3939               default: gotspace = FALSE; break;
3940               case 0x09:      /* HT */
3941               case 0x20:      /* SPACE */
3942               case 0xa0:      /* NBSP */
3943               case 0x1680:    /* OGHAM SPACE MARK */
3944               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3945               case 0x2000:    /* EN QUAD */
3946               case 0x2001:    /* EM QUAD */
3947               case 0x2002:    /* EN SPACE */
3948               case 0x2003:    /* EM SPACE */
3949               case 0x2004:    /* THREE-PER-EM SPACE */
3950               case 0x2005:    /* FOUR-PER-EM SPACE */
3951               case 0x2006:    /* SIX-PER-EM SPACE */
3952               case 0x2007:    /* FIGURE SPACE */
3953               case 0x2008:    /* PUNCTUATION SPACE */
3954               case 0x2009:    /* THIN SPACE */
3955               case 0x200A:    /* HAIR SPACE */
3956               case 0x202f:    /* NARROW NO-BREAK SPACE */
3957               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3958               case 0x3000:    /* IDEOGRAPHIC SPACE */
3959               gotspace = TRUE;
3960               break;
3961               }
3962             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3963             eptr += len;
3964             }
3965           break;
3966
3967           case OP_NOT_VSPACE:
3968           case OP_VSPACE:
3969           for (i = min; i < max; i++)
3970             {
3971             BOOL gotspace;
3972             int len = 1;
3973             if (eptr >= md->end_subject) break;
3974             GETCHARLEN(c, eptr, len);
3975             switch(c)
3976               {
3977               default: gotspace = FALSE; break;
3978               case 0x0a:      /* LF */
3979               case 0x0b:      /* VT */
3980               case 0x0c:      /* FF */
3981               case 0x0d:      /* CR */
3982               case 0x85:      /* NEL */
3983               case 0x2028:    /* LINE SEPARATOR */
3984               case 0x2029:    /* PARAGRAPH SEPARATOR */
3985               gotspace = TRUE;
3986               break;
3987               }
3988             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3989             eptr += len;
3990             }
3991           break;
3992
3993           case OP_NOT_DIGIT:
3994           for (i = min; i < max; i++)
3995             {
3996             int len = 1;
3997             if (eptr >= md->end_subject) break;
3998             GETCHARLEN(c, eptr, len);
3999             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4000             eptr+= len;
4001             }
4002           break;
4003
4004           case OP_DIGIT:
4005           for (i = min; i < max; i++)
4006             {
4007             int len = 1;
4008             if (eptr >= md->end_subject) break;
4009             GETCHARLEN(c, eptr, len);
4010             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4011             eptr+= len;
4012             }
4013           break;
4014
4015           case OP_NOT_WHITESPACE:
4016           for (i = min; i < max; i++)
4017             {
4018             int len = 1;
4019             if (eptr >= md->end_subject) break;
4020             GETCHARLEN(c, eptr, len);
4021             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4022             eptr+= len;
4023             }
4024           break;
4025
4026           case OP_WHITESPACE:
4027           for (i = min; i < max; i++)
4028             {
4029             int len = 1;
4030             if (eptr >= md->end_subject) break;
4031             GETCHARLEN(c, eptr, len);
4032             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4033             eptr+= len;
4034             }
4035           break;
4036
4037           case OP_NOT_WORDCHAR:
4038           for (i = min; i < max; i++)
4039             {
4040             int len = 1;
4041             if (eptr >= md->end_subject) break;
4042             GETCHARLEN(c, eptr, len);
4043             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4044             eptr+= len;
4045             }
4046           break;
4047
4048           case OP_WORDCHAR:
4049           for (i = min; i < max; i++)
4050             {
4051             int len = 1;
4052             if (eptr >= md->end_subject) break;
4053             GETCHARLEN(c, eptr, len);
4054             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4055             eptr+= len;
4056             }
4057           break;
4058
4059           default:
4060           RRETURN(PCRE_ERROR_INTERNAL);
4061           }
4062
4063         /* eptr is now past the end of the maximum run */
4064
4065         if (possessive) continue;
4066         for(;;)
4067           {
4068           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4069           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4070           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4071           BACKCHAR(eptr);
4072           }
4073         }
4074       else
4075 #endif  /* SUPPORT_UTF8 */
4076
4077       /* Not UTF-8 mode */
4078         {
4079         switch(ctype)
4080           {
4081           case OP_ANY:
4082           for (i = min; i < max; i++)
4083             {
4084             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4085             eptr++;
4086             }
4087           break;
4088
4089           case OP_ALLANY:
4090           case OP_ANYBYTE:
4091           c = max - min;
4092           if (c > (unsigned int)(md->end_subject - eptr))
4093             c = md->end_subject - eptr;
4094           eptr += c;
4095           break;
4096
4097           case OP_ANYNL:
4098           for (i = min; i < max; i++)
4099             {
4100             if (eptr >= md->end_subject) break;
4101             c = *eptr;
4102             if (c == 0x000d)
4103               {
4104               if (++eptr >= md->end_subject) break;
4105               if (*eptr == 0x000a) eptr++;
4106               }
4107             else
4108               {
4109               if (c != 0x000a &&
4110                   (md->bsr_anycrlf ||
4111                     (c != 0x000b && c != 0x000c && c != 0x0085)))
4112                 break;
4113               eptr++;
4114               }
4115             }
4116           break;
4117
4118           case OP_NOT_HSPACE:
4119           for (i = min; i < max; i++)
4120             {
4121             if (eptr >= md->end_subject) break;
4122             c = *eptr;
4123             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4124             eptr++;
4125             }
4126           break;
4127
4128           case OP_HSPACE:
4129           for (i = min; i < max; i++)
4130             {
4131             if (eptr >= md->end_subject) break;
4132             c = *eptr;
4133             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4134             eptr++;
4135             }
4136           break;
4137
4138           case OP_NOT_VSPACE:
4139           for (i = min; i < max; i++)
4140             {
4141             if (eptr >= md->end_subject) break;
4142             c = *eptr;
4143             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4144               break;
4145             eptr++;
4146             }
4147           break;
4148
4149           case OP_VSPACE:
4150           for (i = min; i < max; i++)
4151             {
4152             if (eptr >= md->end_subject) break;
4153             c = *eptr;
4154             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4155               break;
4156             eptr++;
4157             }
4158           break;
4159
4160           case OP_NOT_DIGIT:
4161           for (i = min; i < max; i++)
4162             {
4163             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4164               break;
4165             eptr++;
4166             }
4167           break;
4168
4169           case OP_DIGIT:
4170           for (i = min; i < max; i++)
4171             {
4172             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4173               break;
4174             eptr++;
4175             }
4176           break;
4177
4178           case OP_NOT_WHITESPACE:
4179           for (i = min; i < max; i++)
4180             {
4181             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4182               break;
4183             eptr++;
4184             }
4185           break;
4186
4187           case OP_WHITESPACE:
4188           for (i = min; i < max; i++)
4189             {
4190             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4191               break;
4192             eptr++;
4193             }
4194           break;
4195
4196           case OP_NOT_WORDCHAR:
4197           for (i = min; i < max; i++)
4198             {
4199             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4200               break;
4201             eptr++;
4202             }
4203           break;
4204
4205           case OP_WORDCHAR:
4206           for (i = min; i < max; i++)
4207             {
4208             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4209               break;
4210             eptr++;
4211             }
4212           break;
4213
4214           default:
4215           RRETURN(PCRE_ERROR_INTERNAL);
4216           }
4217
4218         /* eptr is now past the end of the maximum run */
4219
4220         if (possessive) continue;
4221         while (eptr >= pp)
4222           {
4223           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4224           eptr--;
4225           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4226           }
4227         }
4228
4229       /* Get here if we can't make it match with any permitted repetitions */
4230
4231       RRETURN(MATCH_NOMATCH);
4232       }
4233     /* Control never gets here */
4234
4235     /* There's been some horrible disaster. Arrival here can only mean there is
4236     something seriously wrong in the code above or the OP_xxx definitions. */
4237
4238     default:
4239     DPRINTF(("Unknown opcode %d\n", *ecode));
4240     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4241     }
4242
4243   /* Do not stick any code in here without much thought; it is assumed
4244   that "continue" in the code above comes out to here to repeat the main
4245   loop. */
4246
4247   }             /* End of main loop */
4248 /* Control never reaches here */
4249
4250
4251 /* When compiling to use the heap rather than the stack for recursive calls to
4252 match(), the RRETURN() macro jumps here. The number that is saved in
4253 frame->Xwhere indicates which label we actually want to return to. */
4254
4255 #ifdef NO_RECURSE
4256 #define LBL(val) case val: goto L_RM##val;
4257 HEAP_RETURN:
4258 switch (frame->Xwhere)
4259   {
4260   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4261   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4262   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4263   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4264   LBL(53) LBL(54)
4265 #ifdef SUPPORT_UTF8
4266   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4267   LBL(32) LBL(34) LBL(42) LBL(46)
4268 #ifdef SUPPORT_UCP
4269   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4270 #endif  /* SUPPORT_UCP */
4271 #endif  /* SUPPORT_UTF8 */
4272   default:
4273   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4274   return PCRE_ERROR_INTERNAL;
4275   }
4276 #undef LBL
4277 #endif  /* NO_RECURSE */
4278 }
4279
4280
4281 /***************************************************************************
4282 ****************************************************************************
4283                    RECURSION IN THE match() FUNCTION
4284
4285 Undefine all the macros that were defined above to handle this. */
4286
4287 #ifdef NO_RECURSE
4288 #undef eptr
4289 #undef ecode
4290 #undef mstart
4291 #undef offset_top
4292 #undef ims
4293 #undef eptrb
4294 #undef flags
4295
4296 #undef callpat
4297 #undef charptr
4298 #undef data
4299 #undef next
4300 #undef pp
4301 #undef prev
4302 #undef saved_eptr
4303
4304 #undef new_recursive
4305
4306 #undef cur_is_word
4307 #undef condition
4308 #undef prev_is_word
4309
4310 #undef original_ims
4311
4312 #undef ctype
4313 #undef length
4314 #undef max
4315 #undef min
4316 #undef number
4317 #undef offset
4318 #undef op
4319 #undef save_capture_last
4320 #undef save_offset1
4321 #undef save_offset2
4322 #undef save_offset3
4323 #undef stacksave
4324
4325 #undef newptrb
4326
4327 #endif
4328
4329 /* These two are defined as macros in both cases */
4330
4331 #undef fc
4332 #undef fi
4333
4334 /***************************************************************************
4335 ***************************************************************************/
4336
4337
4338
4339 /*************************************************
4340 *         Execute a Regular Expression           *
4341 *************************************************/
4342
4343 /* This function applies a compiled re to a subject string and picks out
4344 portions of the string if it matches. Two elements in the vector are set for
4345 each substring: the offsets to the start and end of the substring.
4346
4347 Arguments:
4348   argument_re     points to the compiled expression
4349   extra_data      points to extra data or is NULL
4350   subject         points to the subject string
4351   length          length of subject string (may contain binary zeros)
4352   start_offset    where to start in the subject string
4353   options         option bits
4354   offsets         points to a vector of ints to be filled in with offsets
4355   offsetcount     the number of elements in the vector
4356
4357 Returns:          > 0 => success; value is the number of elements filled in
4358                   = 0 => success, but offsets is not big enough
4359                    -1 => failed to match
4360                  < -1 => some kind of unexpected problem
4361 */
4362
4363 PCRE_EXP_DEFN int
4364 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4365   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4366   int offsetcount)
4367 {
4368 int rc, resetcount, ocount;
4369 int first_byte = -1;
4370 int req_byte = -1;
4371 int req_byte2 = -1;
4372 int newline;
4373 unsigned long int ims;
4374 BOOL using_temporary_offsets = FALSE;
4375 BOOL anchored;
4376 BOOL startline;
4377 BOOL firstline;
4378 BOOL first_byte_caseless = FALSE;
4379 BOOL req_byte_caseless = FALSE;
4380 BOOL utf8;
4381 match_data match_block;
4382 match_data *md = &match_block;
4383 const uschar *tables;
4384 const uschar *start_bits = NULL;
4385 USPTR start_match = (USPTR)subject + start_offset;
4386 USPTR end_subject;
4387 USPTR req_byte_ptr = start_match - 1;
4388
4389 pcre_study_data internal_study;
4390 const pcre_study_data *study;
4391
4392 real_pcre internal_re;
4393 const real_pcre *external_re = (const real_pcre *)argument_re;
4394 const real_pcre *re = external_re;
4395
4396 /* Plausibility checks */
4397
4398 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4399 if (re == NULL || subject == NULL ||
4400    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4401 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4402
4403 /* Fish out the optional data from the extra_data structure, first setting
4404 the default values. */
4405
4406 study = NULL;
4407 md->match_limit = MATCH_LIMIT;
4408 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4409 md->callout_data = NULL;
4410
4411 /* The table pointer is always in native byte order. */
4412
4413 tables = external_re->tables;
4414
4415 if (extra_data != NULL)
4416   {
4417   register unsigned int flags = extra_data->flags;
4418   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4419     study = (const pcre_study_data *)extra_data->study_data;
4420   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4421     md->match_limit = extra_data->match_limit;
4422   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4423     md->match_limit_recursion = extra_data->match_limit_recursion;
4424   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4425     md->callout_data = extra_data->callout_data;
4426   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4427   }
4428
4429 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4430 is a feature that makes it possible to save compiled regex and re-use them
4431 in other programs later. */
4432
4433 if (tables == NULL) tables = _pcre_default_tables;
4434
4435 /* Check that the first field in the block is the magic number. If it is not,
4436 test for a regex that was compiled on a host of opposite endianness. If this is
4437 the case, flipped values are put in internal_re and internal_study if there was
4438 study data too. */
4439
4440 if (re->magic_number != MAGIC_NUMBER)
4441   {
4442   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4443   if (re == NULL) return PCRE_ERROR_BADMAGIC;
4444   if (study != NULL) study = &internal_study;
4445   }
4446
4447 /* Set up other data */
4448
4449 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4450 startline = (re->flags & PCRE_STARTLINE) != 0;
4451 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4452
4453 /* The code starts after the real_pcre block and the capture name table. */
4454
4455 md->start_code = (const uschar *)external_re + re->name_table_offset +
4456   re->name_count * re->name_entry_size;
4457
4458 md->start_subject = (USPTR)subject;
4459 md->start_offset = start_offset;
4460 md->end_subject = md->start_subject + length;
4461 end_subject = md->end_subject;
4462
4463 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4464 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4465 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4466
4467 md->notbol = (options & PCRE_NOTBOL) != 0;
4468 md->noteol = (options & PCRE_NOTEOL) != 0;
4469 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4470 md->partial = (options & PCRE_PARTIAL) != 0;
4471 md->hitend = FALSE;
4472
4473 md->recursive = NULL;                   /* No recursion at top level */
4474
4475 md->lcc = tables + lcc_offset;
4476 md->ctypes = tables + ctypes_offset;
4477
4478 /* Handle different \R options. */
4479
4480 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4481   {
4482   case 0:
4483   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4484     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4485   else
4486 #ifdef BSR_ANYCRLF
4487   md->bsr_anycrlf = TRUE;
4488 #else
4489   md->bsr_anycrlf = FALSE;
4490 #endif
4491   break;
4492
4493   case PCRE_BSR_ANYCRLF:
4494   md->bsr_anycrlf = TRUE;
4495   break;
4496
4497   case PCRE_BSR_UNICODE:
4498   md->bsr_anycrlf = FALSE;
4499   break;
4500
4501   default: return PCRE_ERROR_BADNEWLINE;
4502   }
4503
4504 /* Handle different types of newline. The three bits give eight cases. If
4505 nothing is set at run time, whatever was used at compile time applies. */
4506
4507 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4508         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4509   {
4510   case 0: newline = NEWLINE; break;   /* Compile-time default */
4511   case PCRE_NEWLINE_CR: newline = '\r'; break;
4512   case PCRE_NEWLINE_LF: newline = '\n'; break;
4513   case PCRE_NEWLINE_CR+
4514        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4515   case PCRE_NEWLINE_ANY: newline = -1; break;
4516   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4517   default: return PCRE_ERROR_BADNEWLINE;
4518   }
4519
4520 if (newline == -2)
4521   {
4522   md->nltype = NLTYPE_ANYCRLF;
4523   }
4524 else if (newline < 0)
4525   {
4526   md->nltype = NLTYPE_ANY;
4527   }
4528 else
4529   {
4530   md->nltype = NLTYPE_FIXED;
4531   if (newline > 255)
4532     {
4533     md->nllen = 2;
4534     md->nl[0] = (newline >> 8) & 255;
4535     md->nl[1] = newline & 255;
4536     }
4537   else
4538     {
4539     md->nllen = 1;
4540     md->nl[0] = newline;
4541     }
4542   }
4543
4544 /* Partial matching is supported only for a restricted set of regexes at the
4545 moment. */
4546
4547 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4548   return PCRE_ERROR_BADPARTIAL;
4549
4550 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4551 back the character offset. */
4552
4553 #ifdef SUPPORT_UTF8
4554 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4555   {
4556   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4557     return PCRE_ERROR_BADUTF8;
4558   if (start_offset > 0 && start_offset < length)
4559     {
4560     int tb = ((uschar *)subject)[start_offset];
4561     if (tb > 127)
4562       {
4563       tb &= 0xc0;
4564       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4565       }
4566     }
4567   }
4568 #endif
4569
4570 /* The ims options can vary during the matching as a result of the presence
4571 of (?ims) items in the pattern. They are kept in a local variable so that
4572 restoring at the exit of a group is easy. */
4573
4574 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4575
4576 /* If the expression has got more back references than the offsets supplied can
4577 hold, we get a temporary chunk of working store to use during the matching.
4578 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4579 of 3. */
4580
4581 ocount = offsetcount - (offsetcount % 3);
4582
4583 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4584   {
4585   ocount = re->top_backref * 3 + 3;
4586   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4587   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4588   using_temporary_offsets = TRUE;
4589   DPRINTF(("Got memory to hold back references\n"));
4590   }
4591 else md->offset_vector = offsets;
4592
4593 md->offset_end = ocount;
4594 md->offset_max = (2*ocount)/3;
4595 md->offset_overflow = FALSE;
4596 md->capture_last = -1;
4597
4598 /* Compute the minimum number of offsets that we need to reset each time. Doing
4599 this makes a huge difference to execution time when there aren't many brackets
4600 in the pattern. */
4601
4602 resetcount = 2 + re->top_bracket * 2;
4603 if (resetcount > offsetcount) resetcount = ocount;
4604
4605 /* Reset the working variable associated with each extraction. These should
4606 never be used unless previously set, but they get saved and restored, and so we
4607 initialize them to avoid reading uninitialized locations. */
4608
4609 if (md->offset_vector != NULL)
4610   {
4611   register int *iptr = md->offset_vector + ocount;
4612   register int *iend = iptr - resetcount/2 + 1;
4613   while (--iptr >= iend) *iptr = -1;
4614   }
4615
4616 /* Set up the first character to match, if available. The first_byte value is
4617 never set for an anchored regular expression, but the anchoring may be forced
4618 at run time, so we have to test for anchoring. The first char may be unset for
4619 an unanchored pattern, of course. If there's no first char and the pattern was
4620 studied, there may be a bitmap of possible first characters. */
4621
4622 if (!anchored)
4623   {
4624   if ((re->flags & PCRE_FIRSTSET) != 0)
4625     {
4626     first_byte = re->first_byte & 255;
4627     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4628       first_byte = md->lcc[first_byte];
4629     }
4630   else
4631     if (!startline && study != NULL &&
4632       (study->options & PCRE_STUDY_MAPPED) != 0)
4633         start_bits = study->start_bits;
4634   }
4635
4636 /* For anchored or unanchored matches, there may be a "last known required
4637 character" set. */
4638
4639 if ((re->flags & PCRE_REQCHSET) != 0)
4640   {
4641   req_byte = re->req_byte & 255;
4642   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4643   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4644   }
4645
4646
4647 /* ==========================================================================*/
4648
4649 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4650 the loop runs just once. */
4651
4652 for(;;)
4653   {
4654   USPTR save_end_subject = end_subject;
4655   USPTR new_start_match;
4656
4657   /* Reset the maximum number of extractions we might see. */
4658
4659   if (md->offset_vector != NULL)
4660     {
4661     register int *iptr = md->offset_vector;
4662     register int *iend = iptr + resetcount;
4663     while (iptr < iend) *iptr++ = -1;
4664     }
4665
4666   /* Advance to a unique first char if possible. If firstline is TRUE, the
4667   start of the match is constrained to the first line of a multiline string.
4668   That is, the match must be before or at the first newline. Implement this by
4669   temporarily adjusting end_subject so that we stop scanning at a newline. If
4670   the match fails at the newline, later code breaks this loop. */
4671
4672   if (firstline)
4673     {
4674     USPTR t = start_match;
4675     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4676     end_subject = t;
4677     }
4678
4679   /* Now test for a unique first byte */
4680
4681   if (first_byte >= 0)
4682     {
4683     if (first_byte_caseless)
4684       while (start_match < end_subject &&
4685              md->lcc[*start_match] != first_byte)
4686         { NEXTCHAR(start_match); }
4687     else
4688       while (start_match < end_subject && *start_match != first_byte)
4689         { NEXTCHAR(start_match); }
4690     }
4691
4692   /* Or to just after a linebreak for a multiline match if possible */
4693
4694   else if (startline)
4695     {
4696     if (start_match > md->start_subject + start_offset)
4697       {
4698       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4699         { NEXTCHAR(start_match); }
4700
4701       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4702       and we are now at a LF, advance the match position by one more character.
4703       */
4704
4705       if (start_match[-1] == '\r' &&
4706            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4707            start_match < end_subject &&
4708            *start_match == '\n')
4709         start_match++;
4710       }
4711     }
4712
4713   /* Or to a non-unique first char after study */
4714
4715   else if (start_bits != NULL)
4716     {
4717     while (start_match < end_subject)
4718       {
4719       register unsigned int c = *start_match;
4720       if ((start_bits[c/8] & (1 << (c&7))) == 0)
4721         { NEXTCHAR(start_match); }
4722       else break;
4723       }
4724     }
4725
4726   /* Restore fudged end_subject */
4727
4728   end_subject = save_end_subject;
4729
4730 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4731   printf(">>>> Match against: ");
4732   pchars(start_match, end_subject - start_match, TRUE, md);
4733   printf("\n");
4734 #endif
4735
4736   /* If req_byte is set, we know that that character must appear in the subject
4737   for the match to succeed. If the first character is set, req_byte must be
4738   later in the subject; otherwise the test starts at the match point. This
4739   optimization can save a huge amount of backtracking in patterns with nested
4740   unlimited repeats that aren't going to match. Writing separate code for
4741   cased/caseless versions makes it go faster, as does using an autoincrement
4742   and backing off on a match.
4743
4744   HOWEVER: when the subject string is very, very long, searching to its end can
4745   take a long time, and give bad performance on quite ordinary patterns. This
4746   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4747   string... so we don't do this when the string is sufficiently long.
4748
4749   ALSO: this processing is disabled when partial matching is requested.
4750   */
4751
4752   if (req_byte >= 0 &&
4753       end_subject - start_match < REQ_BYTE_MAX &&
4754       !md->partial)
4755     {
4756     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4757
4758     /* We don't need to repeat the search if we haven't yet reached the
4759     place we found it at last time. */
4760
4761     if (p > req_byte_ptr)
4762       {
4763       if (req_byte_caseless)
4764         {
4765         while (p < end_subject)
4766           {
4767           register int pp = *p++;
4768           if (pp == req_byte || pp == req_byte2) { p--; break; }
4769           }
4770         }
4771       else
4772         {
4773         while (p < end_subject)
4774           {
4775           if (*p++ == req_byte) { p--; break; }
4776           }
4777         }
4778
4779       /* If we can't find the required character, break the matching loop,
4780       forcing a match failure. */
4781
4782       if (p >= end_subject)
4783         {
4784         rc = MATCH_NOMATCH;
4785         break;
4786         }
4787
4788       /* If we have found the required character, save the point where we
4789       found it, so that we don't search again next time round the loop if
4790       the start hasn't passed this character yet. */
4791
4792       req_byte_ptr = p;
4793       }
4794     }
4795
4796   /* OK, we can now run the match. */
4797
4798   md->start_match_ptr = start_match;
4799   md->match_call_count = 0;
4800   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4801
4802   switch(rc)
4803     {
4804     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4805     exactly like PRUNE. */
4806
4807     case MATCH_NOMATCH:
4808     case MATCH_PRUNE:
4809     case MATCH_THEN:
4810     new_start_match = start_match + 1;
4811 #ifdef SUPPORT_UTF8
4812     if (utf8)
4813       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4814         new_start_match++;
4815 #endif
4816     break;
4817
4818     /* SKIP passes back the next starting point explicitly. */
4819
4820     case MATCH_SKIP:
4821     new_start_match = md->start_match_ptr;
4822     break;
4823
4824     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4825
4826     case MATCH_COMMIT:
4827     rc = MATCH_NOMATCH;
4828     goto ENDLOOP;
4829
4830     /* Any other return is some kind of error. */
4831
4832     default:
4833     goto ENDLOOP;
4834     }
4835
4836   /* Control reaches here for the various types of "no match at this point"
4837   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4838
4839   rc = MATCH_NOMATCH;
4840
4841   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4842   newline in the subject (though it may continue over the newline). Therefore,
4843   if we have just failed to match, starting at a newline, do not continue. */
4844
4845   if (firstline && IS_NEWLINE(start_match)) break;
4846
4847   /* Advance to new matching position */
4848
4849   start_match = new_start_match;
4850
4851   /* Break the loop if the pattern is anchored or if we have passed the end of
4852   the subject. */
4853
4854   if (anchored || start_match > end_subject) break;
4855
4856   /* If we have just passed a CR and we are now at a LF, and the pattern does
4857   not contain any explicit matches for \r or \n, and the newline option is CRLF
4858   or ANY or ANYCRLF, advance the match position by one more character. */
4859
4860   if (start_match[-1] == '\r' &&
4861       start_match < end_subject &&
4862       *start_match == '\n' &&
4863       (re->flags & PCRE_HASCRORLF) == 0 &&
4864         (md->nltype == NLTYPE_ANY ||
4865          md->nltype == NLTYPE_ANYCRLF ||
4866          md->nllen == 2))
4867     start_match++;
4868
4869   }   /* End of for(;;) "bumpalong" loop */
4870
4871 /* ==========================================================================*/
4872
4873 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4874 conditions is true:
4875
4876 (1) The pattern is anchored or the match was failed by (*COMMIT);
4877
4878 (2) We are past the end of the subject;
4879
4880 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4881     this option requests that a match occur at or before the first newline in
4882     the subject.
4883
4884 When we have a match and the offset vector is big enough to deal with any
4885 backreferences, captured substring offsets will already be set up. In the case
4886 where we had to get some local store to hold offsets for backreference
4887 processing, copy those that we can. In this case there need not be overflow if
4888 certain parts of the pattern were not used, even though there are more
4889 capturing parentheses than vector slots. */
4890
4891 ENDLOOP:
4892
4893 if (rc == MATCH_MATCH)
4894   {
4895   if (using_temporary_offsets)
4896     {
4897     if (offsetcount >= 4)
4898       {
4899       memcpy(offsets + 2, md->offset_vector + 2,
4900         (offsetcount - 2) * sizeof(int));
4901       DPRINTF(("Copied offsets from temporary memory\n"));
4902       }
4903     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4904     DPRINTF(("Freeing temporary memory\n"));
4905     (pcre_free)(md->offset_vector);
4906     }
4907
4908   /* Set the return code to the number of captured strings, or 0 if there are
4909   too many to fit into the vector. */
4910
4911   rc = md->offset_overflow? 0 : md->end_offset_top/2;
4912
4913   /* If there is space, set up the whole thing as substring 0. The value of
4914   md->start_match_ptr might be modified if \K was encountered on the success
4915   matching path. */
4916
4917   if (offsetcount < 2) rc = 0; else
4918     {
4919     offsets[0] = md->start_match_ptr - md->start_subject;
4920     offsets[1] = md->end_match_ptr - md->start_subject;
4921     }
4922
4923   DPRINTF((">>>> returning %d\n", rc));
4924   return rc;
4925   }
4926
4927 /* Control gets here if there has been an error, or if the overall match
4928 attempt has failed at all permitted starting positions. */
4929
4930 if (using_temporary_offsets)
4931   {
4932   DPRINTF(("Freeing temporary memory\n"));
4933   (pcre_free)(md->offset_vector);
4934   }
4935
4936 if (rc != MATCH_NOMATCH)
4937   {
4938   DPRINTF((">>>> error: returning %d\n", rc));
4939   return rc;
4940   }
4941 else if (md->partial && md->hitend)
4942   {
4943   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4944   return PCRE_ERROR_PARTIAL;
4945   }
4946 else
4947   {
4948   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4949   return PCRE_ERROR_NOMATCH;
4950   }
4951 }
4952
4953 /* End of pcre_exec.c */