glib/pcre/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2007 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40
  41 /* This module contains pcre_exec(), the externally visible function that does
  42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  43 possible. There are also some static supporting functions. */
  44
  45 #define NLBLOCK md             /* Block containing newline information */
  46 #define PSSTART start_subject  /* Field containing processed string start */
  47 #define PSEND   end_subject    /* Field containing processed string end */
  48
  49 #include "pcre_internal.h"
  50
  51 /* Undefine some potentially clashing cpp symbols */
  52
  53 #undef min
  54 #undef max
  55
  56 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
  57 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
  58
  59 #define EPTR_WORK_SIZE (1000)
  60
  61 /* Flag bits for the match() function */
  62
  63 #define match_condassert     0x01  /* Called to check a condition assertion */
  64 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  65 #define match_tail_recursed  0x04  /* Tail recursive call */
  66
  67 /* Non-error returns from the match() function. Error returns are externally
  68 defined PCRE_ERROR_xxx codes, which are all negative. */
  69
  70 #define MATCH_MATCH        1
  71 #define MATCH_NOMATCH      0
  72
  73 /* Maximum number of ints of offset to save on the stack for recursive calls.
  74 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  75 because the offset vector is always a multiple of 3 long. */
  76
  77 #define REC_STACK_SAVE_MAX 30
  78
  79 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  80
  81 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  82 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  83
  84
  85
  86 #ifdef DEBUG
  87 /*************************************************
  88 *        Debugging function to print chars       *
  89 *************************************************/
  90
  91 /* Print a sequence of chars in printable format, stopping at the end of the
  92 subject if the requested.
  93
  94 Arguments:
  95   p           points to characters
  96   length      number to print
  97   is_subject  TRUE if printing from within md->start_subject
  98   md          pointer to matching data block, if is_subject is TRUE
  99
 100 Returns:     nothing
 101 */
 102
 103 static void
 104 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 105 {
 106 unsigned int c;
 107 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 108 while (length-- > 0)
 109   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 110 }
 111 #endif
 112
 113
 114
 115 /*************************************************
 116 *          Match a back-reference                *
 117 *************************************************/
 118
 119 /* If a back reference hasn't been set, the length that is passed is greater
 120 than the number of characters left in the string, so the match fails.
 121
 122 Arguments:
 123   offset      index into the offset vector
 124   eptr        points into the subject
 125   length      length to be matched
 126   md          points to match data block
 127   ims         the ims flags
 128
 129 Returns:      TRUE if matched
 130 */
 131
 132 static BOOL
 133 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 134   unsigned long int ims)
 135 {
 136 USPTR p = md->start_subject + md->offset_vector[offset];
 137
 138 #ifdef DEBUG
 139 if (eptr >= md->end_subject)
 140   printf("matching subject <null>");
 141 else
 142   {
 143   printf("matching subject ");
 144   pchars(eptr, length, TRUE, md);
 145   }
 146 printf(" against backref ");
 147 pchars(p, length, FALSE, md);
 148 printf("\n");
 149 #endif
 150
 151 /* Always fail if not enough characters left */
 152
 153 if (length > md->end_subject - eptr) return FALSE;
 154
 155 /* Separate the caselesss case for speed */
 156
 157 if ((ims & PCRE_CASELESS) != 0)
 158   {
 159   while (length-- > 0)
 160     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
 161   }
 162 else
 163   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 164
 165 return TRUE;
 166 }
 167
 168
 169
 170 /***************************************************************************
 171 ****************************************************************************
 172                    RECURSION IN THE match() FUNCTION
 173
 174 The match() function is highly recursive, though not every recursive call
 175 increases the recursive depth. Nevertheless, some regular expressions can cause
 176 it to recurse to a great depth. I was writing for Unix, so I just let it call
 177 itself recursively. This uses the stack for saving everything that has to be
 178 saved for a recursive call. On Unix, the stack can be large, and this works
 179 fine.
 180
 181 It turns out that on some non-Unix-like systems there are problems with
 182 programs that use a lot of stack. (This despite the fact that every last chip
 183 has oodles of memory these days, and techniques for extending the stack have
 184 been known for decades.) So....
 185
 186 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 187 calls by keeping local variables that need to be preserved in blocks of memory
 188 obtained from malloc() instead instead of on the stack. Macros are used to
 189 achieve this so that the actual code doesn't look very different to what it
 190 always used to.
 191
 192 The original heap-recursive code used longjmp(). However, it seems that this
 193 can be very slow on some operating systems. Following a suggestion from Stan
 194 Switzer, the use of longjmp() has been abolished, at the cost of having to
 195 provide a unique number for each call to RMATCH. There is no way of generating
 196 a sequence of numbers at compile time in C. I have given them names, to make
 197 them stand out more clearly.
 198
 199 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 200 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 201 tests. Furthermore, not using longjmp() means that local dynamic variables
 202 don't have indeterminate values; this has meant that the frame size can be
 203 reduced because the result can be "passed back" by straight setting of the
 204 variable instead of being passed in the frame.
 205 ****************************************************************************
 206 ***************************************************************************/
 207
 208
 209 /* Numbers for RMATCH calls */
 210
 211 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 212        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 213        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 214        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 215        RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
 216
 217
 218 /* These versions of the macros use the stack, as normal. There are debugging
 219 versions and production versions. Note that the "rw" argument of RMATCH isn't
 220 actuall used in this definition. */
 221
 222 #ifndef NO_RECURSE
 223 #define REGISTER register
 224
 225 #ifdef DEBUG
 226 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 227   { \
 228   printf("match() called in line %d\n", __LINE__); \
 229   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
 230   printf("to line %d\n", __LINE__); \
 231   }
 232 #define RRETURN(ra) \
 233   { \
 234   printf("match() returned %d from line %d ", ra, __LINE__); \
 235   return ra; \
 236   }
 237 #else
 238 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 239   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
 240 #define RRETURN(ra) return ra
 241 #endif
 242
 243 #else
 244
 245
 246 /* These versions of the macros manage a private stack on the heap. Note that
 247 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 248 argument of match(), which never changes. */
 249
 250 #define REGISTER
 251
 252 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
 253   {\
 254   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
 255   frame->Xwhere = rw; \
 256   newframe->Xeptr = ra;\
 257   newframe->Xecode = rb;\
 258   newframe->Xmstart = mstart;\
 259   newframe->Xoffset_top = rc;\
 260   newframe->Xims = re;\
 261   newframe->Xeptrb = rf;\
 262   newframe->Xflags = rg;\
 263   newframe->Xrdepth = frame->Xrdepth + 1;\
 264   newframe->Xprevframe = frame;\
 265   frame = newframe;\
 266   DPRINTF(("restarting from line %d\n", __LINE__));\
 267   goto HEAP_RECURSE;\
 268   L_##rw:\
 269   DPRINTF(("jumped back to line %d\n", __LINE__));\
 270   }
 271
 272 #define RRETURN(ra)\
 273   {\
 274   heapframe *newframe = frame;\
 275   frame = newframe->Xprevframe;\
 276   (pcre_stack_free)(newframe);\
 277   if (frame != NULL)\
 278     {\
 279     rrc = ra;\
 280     goto HEAP_RETURN;\
 281     }\
 282   return ra;\
 283   }
 284
 285
 286 /* Structure for remembering the local variables in a private frame */
 287
 288 typedef struct heapframe {
 289   struct heapframe *Xprevframe;
 290
 291   /* Function arguments that may change */
 292
 293   const uschar *Xeptr;
 294   const uschar *Xecode;
 295   const uschar *Xmstart;
 296   int Xoffset_top;
 297   long int Xims;
 298   eptrblock *Xeptrb;
 299   int Xflags;
 300   unsigned int Xrdepth;
 301
 302   /* Function local variables */
 303
 304   const uschar *Xcallpat;
 305   const uschar *Xcharptr;
 306   const uschar *Xdata;
 307   const uschar *Xnext;
 308   const uschar *Xpp;
 309   const uschar *Xprev;
 310   const uschar *Xsaved_eptr;
 311
 312   recursion_info Xnew_recursive;
 313
 314   BOOL Xcur_is_word;
 315   BOOL Xcondition;
 316   BOOL Xprev_is_word;
 317
 318   unsigned long int Xoriginal_ims;
 319
 320 #ifdef SUPPORT_UCP
 321   int Xprop_type;
 322   int Xprop_value;
 323   int Xprop_fail_result;
 324   int Xprop_category;
 325   int Xprop_chartype;
 326   int Xprop_script;
 327   int Xoclength;
 328   uschar Xocchars[8];
 329 #endif
 330
 331   int Xctype;
 332   unsigned int Xfc;
 333   int Xfi;
 334   int Xlength;
 335   int Xmax;
 336   int Xmin;
 337   int Xnumber;
 338   int Xoffset;
 339   int Xop;
 340   int Xsave_capture_last;
 341   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 342   int Xstacksave[REC_STACK_SAVE_MAX];
 343
 344   eptrblock Xnewptrb;
 345
 346   /* Where to jump back to */
 347
 348   int Xwhere;
 349
 350 } heapframe;
 351
 352 #endif
 353
 354
 355 /***************************************************************************
 356 ***************************************************************************/
 357
 358
 359
 360 /*************************************************
 361 *         Match from current position            *
 362 *************************************************/
 363
 364 /* This function is called recursively in many circumstances. Whenever it
 365 returns a negative (error) response, the outer incarnation must also return the
 366 same response.
 367
 368 Performance note: It might be tempting to extract commonly used fields from the
 369 md structure (e.g. utf8, end_subject) into individual variables to improve
 370 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 371 made performance worse.
 372
 373 Arguments:
 374    eptr        pointer to current character in subject
 375    ecode       pointer to current position in compiled code
 376    mstart      pointer to the current match start position (can be modified
 377                  by encountering \K)
 378    offset_top  current top pointer
 379    md          pointer to "static" info for the match
 380    ims         current /i, /m, and /s options
 381    eptrb       pointer to chain of blocks containing eptr at start of
 382                  brackets - for testing for empty matches
 383    flags       can contain
 384                  match_condassert - this is an assertion condition
 385                  match_cbegroup - this is the start of an unlimited repeat
 386                    group that can match an empty string
 387                  match_tail_recursed - this is a tail_recursed group
 388    rdepth      the recursion depth
 389
 390 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 391                MATCH_NOMATCH if failed to match  )
 392                a negative PCRE_ERROR_xxx value if aborted by an error condition
 393                  (e.g. stopped by repeated call or recursion limit)
 394 */
 395
 396 static int
 397 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
 398   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
 399   int flags, unsigned int rdepth)
 400 {
 401 /* These variables do not need to be preserved over recursion in this function,
 402 so they can be ordinary variables in all cases. Mark some of them with
 403 "register" because they are used a lot in loops. */
 404
 405 register int  rrc;         /* Returns from recursive calls */
 406 register int  i;           /* Used for loops not involving calls to RMATCH() */
 407 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 408 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 409
 410 BOOL minimize, possessive; /* Quantifier options */
 411
 412 /* When recursion is not being used, all "local" variables that have to be
 413 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 414 heap storage. Set up the top-level frame here; others are obtained from the
 415 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 416
 417 #ifdef NO_RECURSE
 418 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
 419 frame->Xprevframe = NULL;            /* Marks the top level */
 420
 421 /* Copy in the original argument variables */
 422
 423 frame->Xeptr = eptr;
 424 frame->Xecode = ecode;
 425 frame->Xmstart = mstart;
 426 frame->Xoffset_top = offset_top;
 427 frame->Xims = ims;
 428 frame->Xeptrb = eptrb;
 429 frame->Xflags = flags;
 430 frame->Xrdepth = rdepth;
 431
 432 /* This is where control jumps back to to effect "recursion" */
 433
 434 HEAP_RECURSE:
 435
 436 /* Macros make the argument variables come from the current frame */
 437
 438 #define eptr               frame->Xeptr
 439 #define ecode              frame->Xecode
 440 #define mstart             frame->Xmstart
 441 #define offset_top         frame->Xoffset_top
 442 #define ims                frame->Xims
 443 #define eptrb              frame->Xeptrb
 444 #define flags              frame->Xflags
 445 #define rdepth             frame->Xrdepth
 446
 447 /* Ditto for the local variables */
 448
 449 #ifdef SUPPORT_UTF8
 450 #define charptr            frame->Xcharptr
 451 #endif
 452 #define callpat            frame->Xcallpat
 453 #define data               frame->Xdata
 454 #define next               frame->Xnext
 455 #define pp                 frame->Xpp
 456 #define prev               frame->Xprev
 457 #define saved_eptr         frame->Xsaved_eptr
 458
 459 #define new_recursive      frame->Xnew_recursive
 460
 461 #define cur_is_word        frame->Xcur_is_word
 462 #define condition          frame->Xcondition
 463 #define prev_is_word       frame->Xprev_is_word
 464
 465 #define original_ims       frame->Xoriginal_ims
 466
 467 #ifdef SUPPORT_UCP
 468 #define prop_type          frame->Xprop_type
 469 #define prop_value         frame->Xprop_value
 470 #define prop_fail_result   frame->Xprop_fail_result
 471 #define prop_category      frame->Xprop_category
 472 #define prop_chartype      frame->Xprop_chartype
 473 #define prop_script        frame->Xprop_script
 474 #define oclength           frame->Xoclength
 475 #define occhars            frame->Xocchars
 476 #endif
 477
 478 #define ctype              frame->Xctype
 479 #define fc                 frame->Xfc
 480 #define fi                 frame->Xfi
 481 #define length             frame->Xlength
 482 #define max                frame->Xmax
 483 #define min                frame->Xmin
 484 #define number             frame->Xnumber
 485 #define offset             frame->Xoffset
 486 #define op                 frame->Xop
 487 #define save_capture_last  frame->Xsave_capture_last
 488 #define save_offset1       frame->Xsave_offset1
 489 #define save_offset2       frame->Xsave_offset2
 490 #define save_offset3       frame->Xsave_offset3
 491 #define stacksave          frame->Xstacksave
 492
 493 #define newptrb            frame->Xnewptrb
 494
 495 /* When recursion is being used, local variables are allocated on the stack and
 496 get preserved during recursion in the normal way. In this environment, fi and
 497 i, and fc and c, can be the same variables. */
 498
 499 #else         /* NO_RECURSE not defined */
 500 #define fi i
 501 #define fc c
 502
 503
 504 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 505 const uschar *charptr;             /* in small blocks of the code. My normal */
 506 #endif                             /* style of coding would have declared    */
 507 const uschar *callpat;             /* them within each of those blocks.      */
 508 const uschar *data;                /* However, in order to accommodate the   */
 509 const uschar *next;                /* version of this code that uses an      */
 510 USPTR         pp;                  /* external "stack" implemented on the    */
 511 const uschar *prev;                /* heap, it is easier to declare them all */
 512 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 513                                    /* out in a block. The only declarations  */
 514 recursion_info new_recursive;      /* within blocks below are for variables  */
 515                                    /* that do not have to be preserved over  */
 516 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 517 BOOL condition;
 518 BOOL prev_is_word;
 519
 520 unsigned long int original_ims;
 521
 522 #ifdef SUPPORT_UCP
 523 int prop_type;
 524 int prop_value;
 525 int prop_fail_result;
 526 int prop_category;
 527 int prop_chartype;
 528 int prop_script;
 529 int oclength;
 530 uschar occhars[8];
 531 #endif
 532
 533 int ctype;
 534 int length;
 535 int max;
 536 int min;
 537 int number;
 538 int offset;
 539 int op;
 540 int save_capture_last;
 541 int save_offset1, save_offset2, save_offset3;
 542 int stacksave[REC_STACK_SAVE_MAX];
 543
 544 eptrblock newptrb;
 545 #endif     /* NO_RECURSE */
 546
 547 /* These statements are here to stop the compiler complaining about unitialized
 548 variables. */
 549
 550 #ifdef SUPPORT_UCP
 551 prop_value = 0;
 552 prop_fail_result = 0;
 553 #endif
 554
 555
 556 /* This label is used for tail recursion, which is used in a few cases even
 557 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 558 used. Thanks to Ian Taylor for noticing this possibility and sending the
 559 original patch. */
 560
 561 TAIL_RECURSE:
 562
 563 /* OK, now we can get on with the real code of the function. Recursive calls
 564 are specified by the macro RMATCH and RRETURN is used to return. When
 565 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 566 and a "return", respectively (possibly with some debugging if DEBUG is
 567 defined). However, RMATCH isn't like a function call because it's quite a
 568 complicated macro. It has to be used in one particular way. This shouldn't,
 569 however, impact performance when true recursion is being used. */
 570
 571 #ifdef SUPPORT_UTF8
 572 utf8 = md->utf8;       /* Local copy of the flag */
 573 #else
 574 utf8 = FALSE;
 575 #endif
 576
 577 /* First check that we haven't called match() too many times, or that we
 578 haven't exceeded the recursive call limit. */
 579
 580 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 581 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 582
 583 original_ims = ims;    /* Save for resetting on ')' */
 584
 585 /* At the start of a group with an unlimited repeat that may match an empty
 586 string, the match_cbegroup flag is set. When this is the case, add the current
 587 subject pointer to the chain of such remembered pointers, to be checked when we
 588 hit the closing ket, in order to break infinite loops that match no characters.
 589 When match() is called in other circumstances, don't add to the chain. If this
 590 is a tail recursion, use a block from the workspace, as the one on the stack is
 591 already used. */
 592
 593 if ((flags & match_cbegroup) != 0)
 594   {
 595   eptrblock *p;
 596   if ((flags & match_tail_recursed) != 0)
 597     {
 598     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
 599     p = md->eptrchain + md->eptrn++;
 600     }
 601   else p = &newptrb;
 602   p->epb_saved_eptr = eptr;
 603   p->epb_prev = eptrb;
 604   eptrb = p;
 605   }
 606
 607 /* Now start processing the opcodes. */
 608
 609 for (;;)
 610   {
 611   minimize = possessive = FALSE;
 612   op = *ecode;
 613
 614   /* For partial matching, remember if we ever hit the end of the subject after
 615   matching at least one subject character. */
 616
 617   if (md->partial &&
 618       eptr >= md->end_subject &&
 619       eptr > mstart)
 620     md->hitend = TRUE;
 621
 622   switch(op)
 623     {
 624     /* Handle a capturing bracket. If there is space in the offset vector, save
 625     the current subject position in the working slot at the top of the vector.
 626     We mustn't change the current values of the data slot, because they may be
 627     set from a previous iteration of this group, and be referred to by a
 628     reference inside the group.
 629
 630     If the bracket fails to match, we need to restore this value and also the
 631     values of the final offsets, in case they were set by a previous iteration
 632     of the same bracket.
 633
 634     If there isn't enough space in the offset vector, treat this as if it were
 635     a non-capturing bracket. Don't worry about setting the flag for the error
 636     case here; that is handled in the code for KET. */
 637
 638     case OP_CBRA:
 639     case OP_SCBRA:
 640     number = GET2(ecode, 1+LINK_SIZE);
 641     offset = number << 1;
 642
 643 #ifdef DEBUG
 644     printf("start bracket %d\n", number);
 645     printf("subject=");
 646     pchars(eptr, 16, TRUE, md);
 647     printf("\n");
 648 #endif
 649
 650     if (offset < md->offset_max)
 651       {
 652       save_offset1 = md->offset_vector[offset];
 653       save_offset2 = md->offset_vector[offset+1];
 654       save_offset3 = md->offset_vector[md->offset_end - number];
 655       save_capture_last = md->capture_last;
 656
 657       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 658       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
 659
 660       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 661       do
 662         {
 663         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 664           ims, eptrb, flags, RM1);
 665         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 666         md->capture_last = save_capture_last;
 667         ecode += GET(ecode, 1);
 668         }
 669       while (*ecode == OP_ALT);
 670
 671       DPRINTF(("bracket %d failed\n", number));
 672
 673       md->offset_vector[offset] = save_offset1;
 674       md->offset_vector[offset+1] = save_offset2;
 675       md->offset_vector[md->offset_end - number] = save_offset3;
 676
 677       RRETURN(MATCH_NOMATCH);
 678       }
 679
 680     /* Insufficient room for saving captured contents. Treat as a non-capturing
 681     bracket. */
 682
 683     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 684
 685     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 686     final alternative within the brackets, we would return the result of a
 687     recursive call to match() whatever happened. We can reduce stack usage by
 688     turning this into a tail recursion. */
 689
 690     case OP_BRA:
 691     case OP_SBRA:
 692     DPRINTF(("start non-capturing bracket\n"));
 693     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 694     for (;;)
 695       {
 696       if (ecode[GET(ecode, 1)] != OP_ALT)
 697         {
 698         ecode += _pcre_OP_lengths[*ecode];
 699         flags |= match_tail_recursed;
 700         DPRINTF(("bracket 0 tail recursion\n"));
 701         goto TAIL_RECURSE;
 702         }
 703
 704       /* For non-final alternatives, continue the loop for a NOMATCH result;
 705       otherwise return. */
 706
 707       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 708         eptrb, flags, RM2);
 709       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 710       ecode += GET(ecode, 1);
 711       }
 712     /* Control never reaches here. */
 713
 714     /* Conditional group: compilation checked that there are no more than
 715     two branches. If the condition is false, skipping the first branch takes us
 716     past the end if there is only one branch, but that's OK because that is
 717     exactly what going to the ket would do. As there is only one branch to be
 718     obeyed, we can use tail recursion to avoid using another stack frame. */
 719
 720     case OP_COND:
 721     case OP_SCOND:
 722     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
 723       {
 724       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
 725       condition = md->recursive != NULL &&
 726         (offset == RREF_ANY || offset == md->recursive->group_num);
 727       ecode += condition? 3 : GET(ecode, 1);
 728       }
 729
 730     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
 731       {
 732       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
 733       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 734       ecode += condition? 3 : GET(ecode, 1);
 735       }
 736
 737     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
 738       {
 739       condition = FALSE;
 740       ecode += GET(ecode, 1);
 741       }
 742
 743     /* The condition is an assertion. Call match() to evaluate it - setting
 744     the final argument match_condassert causes it to stop at the end of an
 745     assertion. */
 746
 747     else
 748       {
 749       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
 750           match_condassert, RM3);
 751       if (rrc == MATCH_MATCH)
 752         {
 753         condition = TRUE;
 754         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
 755         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
 756         }
 757       else if (rrc != MATCH_NOMATCH)
 758         {
 759         RRETURN(rrc);         /* Need braces because of following else */
 760         }
 761       else
 762         {
 763         condition = FALSE;
 764         ecode += GET(ecode, 1);
 765         }
 766       }
 767
 768     /* We are now at the branch that is to be obeyed. As there is only one,
 769     we can use tail recursion to avoid using another stack frame. If the second
 770     alternative doesn't exist, we can just plough on. */
 771
 772     if (condition || *ecode == OP_ALT)
 773       {
 774       ecode += 1 + LINK_SIZE;
 775       flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
 776       goto TAIL_RECURSE;
 777       }
 778     else
 779       {
 780       ecode += 1 + LINK_SIZE;
 781       }
 782     break;
 783
 784
 785     /* End of the pattern. If we are in a top-level recursion, we should
 786     restore the offsets appropriately and continue from after the call. */
 787
 788     case OP_END:
 789     if (md->recursive != NULL && md->recursive->group_num == 0)
 790       {
 791       recursion_info *rec = md->recursive;
 792       DPRINTF(("End of pattern in a (?0) recursion\n"));
 793       md->recursive = rec->prevrec;
 794       memmove(md->offset_vector, rec->offset_save,
 795         rec->saved_max * sizeof(int));
 796       mstart = rec->save_start;
 797       ims = original_ims;
 798       ecode = rec->after_call;
 799       break;
 800       }
 801
 802     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
 803     string - backtracking will then try other alternatives, if any. */
 804
 805     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
 806     md->end_match_ptr = eptr;           /* Record where we ended */
 807     md->end_offset_top = offset_top;    /* and how many extracts were taken */
 808     md->start_match_ptr = mstart;  /* and the start (\K can modify) */
 809     RRETURN(MATCH_MATCH);
 810
 811     /* Change option settings */
 812
 813     case OP_OPT:
 814     ims = ecode[1];
 815     ecode += 2;
 816     DPRINTF(("ims set to %02lx\n", ims));
 817     break;
 818
 819     /* Assertion brackets. Check the alternative branches in turn - the
 820     matching won't pass the KET for an assertion. If any one branch matches,
 821     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 822     start of each branch to move the current point backwards, so the code at
 823     this level is identical to the lookahead case. */
 824
 825     case OP_ASSERT:
 826     case OP_ASSERTBACK:
 827     do
 828       {
 829       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 830         RM4);
 831       if (rrc == MATCH_MATCH) break;
 832       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 833       ecode += GET(ecode, 1);
 834       }
 835     while (*ecode == OP_ALT);
 836     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 837
 838     /* If checking an assertion for a condition, return MATCH_MATCH. */
 839
 840     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 841
 842     /* Continue from after the assertion, updating the offsets high water
 843     mark, since extracts may have been taken during the assertion. */
 844
 845     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 846     ecode += 1 + LINK_SIZE;
 847     offset_top = md->end_offset_top;
 848     continue;
 849
 850     /* Negative assertion: all branches must fail to match */
 851
 852     case OP_ASSERT_NOT:
 853     case OP_ASSERTBACK_NOT:
 854     do
 855       {
 856       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 857         RM5);
 858       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
 859       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 860       ecode += GET(ecode,1);
 861       }
 862     while (*ecode == OP_ALT);
 863
 864     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 865
 866     ecode += 1 + LINK_SIZE;
 867     continue;
 868
 869     /* Move the subject pointer back. This occurs only at the start of
 870     each branch of a lookbehind assertion. If we are too close to the start to
 871     move back, this match function fails. When working with UTF-8 we move
 872     back a number of characters, not bytes. */
 873
 874     case OP_REVERSE:
 875 #ifdef SUPPORT_UTF8
 876     if (utf8)
 877       {
 878       i = GET(ecode, 1);
 879       while (i-- > 0)
 880         {
 881         eptr--;
 882         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 883         BACKCHAR(eptr)
 884         }
 885       }
 886     else
 887 #endif
 888
 889     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
 890
 891       {
 892       eptr -= GET(ecode, 1);
 893       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 894       }
 895
 896     /* Skip to next op code */
 897
 898     ecode += 1 + LINK_SIZE;
 899     break;
 900
 901     /* The callout item calls an external function, if one is provided, passing
 902     details of the match so far. This is mainly for debugging, though the
 903     function is able to force a failure. */
 904
 905     case OP_CALLOUT:
 906     if (pcre_callout != NULL)
 907       {
 908       pcre_callout_block cb;
 909       cb.version          = 1;   /* Version 1 of the callout block */
 910       cb.callout_number   = ecode[1];
 911       cb.offset_vector    = md->offset_vector;
 912       cb.subject          = (PCRE_SPTR)md->start_subject;
 913       cb.subject_length   = md->end_subject - md->start_subject;
 914       cb.start_match      = mstart - md->start_subject;
 915       cb.current_position = eptr - md->start_subject;
 916       cb.pattern_position = GET(ecode, 2);
 917       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 918       cb.capture_top      = offset_top/2;
 919       cb.capture_last     = md->capture_last;
 920       cb.callout_data     = md->callout_data;
 921       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 922       if (rrc < 0) RRETURN(rrc);
 923       }
 924     ecode += 2 + 2*LINK_SIZE;
 925     break;
 926
 927     /* Recursion either matches the current regex, or some subexpression. The
 928     offset data is the offset to the starting bracket from the start of the
 929     whole pattern. (This is so that it works from duplicated subpatterns.)
 930
 931     If there are any capturing brackets started but not finished, we have to
 932     save their starting points and reinstate them after the recursion. However,
 933     we don't know how many such there are (offset_top records the completed
 934     total) so we just have to save all the potential data. There may be up to
 935     65535 such values, which is too large to put on the stack, but using malloc
 936     for small numbers seems expensive. As a compromise, the stack is used when
 937     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
 938     is used. A problem is what to do if the malloc fails ... there is no way of
 939     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
 940     values on the stack, and accept that the rest may be wrong.
 941
 942     There are also other values that have to be saved. We use a chained
 943     sequence of blocks that actually live on the stack. Thanks to Robin Houston
 944     for the original version of this logic. */
 945
 946     case OP_RECURSE:
 947       {
 948       callpat = md->start_code + GET(ecode, 1);
 949       new_recursive.group_num = (callpat == md->start_code)? 0 :
 950         GET2(callpat, 1 + LINK_SIZE);
 951
 952       /* Add to "recursing stack" */
 953
 954       new_recursive.prevrec = md->recursive;
 955       md->recursive = &new_recursive;
 956
 957       /* Find where to continue from afterwards */
 958
 959       ecode += 1 + LINK_SIZE;
 960       new_recursive.after_call = ecode;
 961
 962       /* Now save the offset data. */
 963
 964       new_recursive.saved_max = md->offset_end;
 965       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
 966         new_recursive.offset_save = stacksave;
 967       else
 968         {
 969         new_recursive.offset_save =
 970           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
 971         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
 972         }
 973
 974       memcpy(new_recursive.offset_save, md->offset_vector,
 975             new_recursive.saved_max * sizeof(int));
 976       new_recursive.save_start = mstart;
 977       mstart = eptr;
 978
 979       /* OK, now we can do the recursion. For each top-level alternative we
 980       restore the offset and recursion data. */
 981
 982       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
 983       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
 984       do
 985         {
 986         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
 987           md, ims, eptrb, flags, RM6);
 988         if (rrc == MATCH_MATCH)
 989           {
 990           DPRINTF(("Recursion matched\n"));
 991           md->recursive = new_recursive.prevrec;
 992           if (new_recursive.offset_save != stacksave)
 993             (pcre_free)(new_recursive.offset_save);
 994           RRETURN(MATCH_MATCH);
 995           }
 996         else if (rrc != MATCH_NOMATCH)
 997           {
 998           DPRINTF(("Recursion gave error %d\n", rrc));
 999           RRETURN(rrc);
1000           }
1001
1002         md->recursive = &new_recursive;
1003         memcpy(md->offset_vector, new_recursive.offset_save,
1004             new_recursive.saved_max * sizeof(int));
1005         callpat += GET(callpat, 1);
1006         }
1007       while (*callpat == OP_ALT);
1008
1009       DPRINTF(("Recursion didn't match\n"));
1010       md->recursive = new_recursive.prevrec;
1011       if (new_recursive.offset_save != stacksave)
1012         (pcre_free)(new_recursive.offset_save);
1013       RRETURN(MATCH_NOMATCH);
1014       }
1015     /* Control never reaches here */
1016
1017     /* "Once" brackets are like assertion brackets except that after a match,
1018     the point in the subject string is not moved back. Thus there can never be
1019     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1020     Check the alternative branches in turn - the matching won't pass the KET
1021     for this kind of subpattern. If any one branch matches, we carry on as at
1022     the end of a normal bracket, leaving the subject pointer. */
1023
1024     case OP_ONCE:
1025     prev = ecode;
1026     saved_eptr = eptr;
1027
1028     do
1029       {
1030       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1031         eptrb, 0, RM7);
1032       if (rrc == MATCH_MATCH) break;
1033       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1034       ecode += GET(ecode,1);
1035       }
1036     while (*ecode == OP_ALT);
1037
1038     /* If hit the end of the group (which could be repeated), fail */
1039
1040     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1041
1042     /* Continue as from after the assertion, updating the offsets high water
1043     mark, since extracts may have been taken. */
1044
1045     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1046
1047     offset_top = md->end_offset_top;
1048     eptr = md->end_match_ptr;
1049
1050     /* For a non-repeating ket, just continue at this level. This also
1051     happens for a repeating ket if no characters were matched in the group.
1052     This is the forcible breaking of infinite loops as implemented in Perl
1053     5.005. If there is an options reset, it will get obeyed in the normal
1054     course of events. */
1055
1056     if (*ecode == OP_KET || eptr == saved_eptr)
1057       {
1058       ecode += 1+LINK_SIZE;
1059       break;
1060       }
1061
1062     /* The repeating kets try the rest of the pattern or restart from the
1063     preceding bracket, in the appropriate order. The second "call" of match()
1064     uses tail recursion, to avoid using another stack frame. We need to reset
1065     any options that changed within the bracket before re-running it, so
1066     check the next opcode. */
1067
1068     if (ecode[1+LINK_SIZE] == OP_OPT)
1069       {
1070       ims = (ims & ~PCRE_IMS) | ecode[4];
1071       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1072       }
1073
1074     if (*ecode == OP_KETRMIN)
1075       {
1076       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1077         RM8);
1078       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079       ecode = prev;
1080       flags = match_tail_recursed;
1081       goto TAIL_RECURSE;
1082       }
1083     else  /* OP_KETRMAX */
1084       {
1085       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1086       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1087       ecode += 1 + LINK_SIZE;
1088       flags = match_tail_recursed;
1089       goto TAIL_RECURSE;
1090       }
1091     /* Control never gets here */
1092
1093     /* An alternation is the end of a branch; scan along to find the end of the
1094     bracketed group and go to there. */
1095
1096     case OP_ALT:
1097     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1098     break;
1099
1100     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1101     that it may occur zero times. It may repeat infinitely, or not at all -
1102     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1103     repeat limits are compiled as a number of copies, with the optional ones
1104     preceded by BRAZERO or BRAMINZERO. */
1105
1106     case OP_BRAZERO:
1107       {
1108       next = ecode+1;
1109       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1110       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1111       do next += GET(next,1); while (*next == OP_ALT);
1112       ecode = next + 1 + LINK_SIZE;
1113       }
1114     break;
1115
1116     case OP_BRAMINZERO:
1117       {
1118       next = ecode+1;
1119       do next += GET(next, 1); while (*next == OP_ALT);
1120       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1121       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1122       ecode++;
1123       }
1124     break;
1125
1126     /* End of a group, repeated or non-repeating. */
1127
1128     case OP_KET:
1129     case OP_KETRMIN:
1130     case OP_KETRMAX:
1131     prev = ecode - GET(ecode, 1);
1132
1133     /* If this was a group that remembered the subject start, in order to break
1134     infinite repeats of empty string matches, retrieve the subject start from
1135     the chain. Otherwise, set it NULL. */
1136
1137     if (*prev >= OP_SBRA)
1138       {
1139       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1140       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1141       }
1142     else saved_eptr = NULL;
1143
1144     /* If we are at the end of an assertion group, stop matching and return
1145     MATCH_MATCH, but record the current high water mark for use by positive
1146     assertions. Do this also for the "once" (atomic) groups. */
1147
1148     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1149         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1150         *prev == OP_ONCE)
1151       {
1152       md->end_match_ptr = eptr;      /* For ONCE */
1153       md->end_offset_top = offset_top;
1154       RRETURN(MATCH_MATCH);
1155       }
1156
1157     /* For capturing groups we have to check the group number back at the start
1158     and if necessary complete handling an extraction by setting the offsets and
1159     bumping the high water mark. Note that whole-pattern recursion is coded as
1160     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1161     when the OP_END is reached. Other recursion is handled here. */
1162
1163     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1164       {
1165       number = GET2(prev, 1+LINK_SIZE);
1166       offset = number << 1;
1167
1168 #ifdef DEBUG
1169       printf("end bracket %d", number);
1170       printf("\n");
1171 #endif
1172
1173       md->capture_last = number;
1174       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1175         {
1176         md->offset_vector[offset] =
1177           md->offset_vector[md->offset_end - number];
1178         md->offset_vector[offset+1] = eptr - md->start_subject;
1179         if (offset_top <= offset) offset_top = offset + 2;
1180         }
1181
1182       /* Handle a recursively called group. Restore the offsets
1183       appropriately and continue from after the call. */
1184
1185       if (md->recursive != NULL && md->recursive->group_num == number)
1186         {
1187         recursion_info *rec = md->recursive;
1188         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1189         md->recursive = rec->prevrec;
1190         mstart = rec->save_start;
1191         memcpy(md->offset_vector, rec->offset_save,
1192           rec->saved_max * sizeof(int));
1193         ecode = rec->after_call;
1194         ims = original_ims;
1195         break;
1196         }
1197       }
1198
1199     /* For both capturing and non-capturing groups, reset the value of the ims
1200     flags, in case they got changed during the group. */
1201
1202     ims = original_ims;
1203     DPRINTF(("ims reset to %02lx\n", ims));
1204
1205     /* For a non-repeating ket, just continue at this level. This also
1206     happens for a repeating ket if no characters were matched in the group.
1207     This is the forcible breaking of infinite loops as implemented in Perl
1208     5.005. If there is an options reset, it will get obeyed in the normal
1209     course of events. */
1210
1211     if (*ecode == OP_KET || eptr == saved_eptr)
1212       {
1213       ecode += 1 + LINK_SIZE;
1214       break;
1215       }
1216
1217     /* The repeating kets try the rest of the pattern or restart from the
1218     preceding bracket, in the appropriate order. In the second case, we can use
1219     tail recursion to avoid using another stack frame. */
1220
1221     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1222
1223     if (*ecode == OP_KETRMIN)
1224       {
1225       RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1226         RM12);
1227       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1228       ecode = prev;
1229       flags |= match_tail_recursed;
1230       goto TAIL_RECURSE;
1231       }
1232     else  /* OP_KETRMAX */
1233       {
1234       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1235       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1236       ecode += 1 + LINK_SIZE;
1237       flags = match_tail_recursed;
1238       goto TAIL_RECURSE;
1239       }
1240     /* Control never gets here */
1241
1242     /* Start of subject unless notbol, or after internal newline if multiline */
1243
1244     case OP_CIRC:
1245     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1246     if ((ims & PCRE_MULTILINE) != 0)
1247       {
1248       if (eptr != md->start_subject &&
1249           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1250         RRETURN(MATCH_NOMATCH);
1251       ecode++;
1252       break;
1253       }
1254     /* ... else fall through */
1255
1256     /* Start of subject assertion */
1257
1258     case OP_SOD:
1259     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1260     ecode++;
1261     break;
1262
1263     /* Start of match assertion */
1264
1265     case OP_SOM:
1266     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1267     ecode++;
1268     break;
1269
1270     /* Reset the start of match point */
1271
1272     case OP_SET_SOM:
1273     mstart = eptr;
1274     ecode++;
1275     break;
1276
1277     /* Assert before internal newline if multiline, or before a terminating
1278     newline unless endonly is set, else end of subject unless noteol is set. */
1279
1280     case OP_DOLL:
1281     if ((ims & PCRE_MULTILINE) != 0)
1282       {
1283       if (eptr < md->end_subject)
1284         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1285       else
1286         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1287       ecode++;
1288       break;
1289       }
1290     else
1291       {
1292       if (md->noteol) RRETURN(MATCH_NOMATCH);
1293       if (!md->endonly)
1294         {
1295         if (eptr != md->end_subject &&
1296             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1297           RRETURN(MATCH_NOMATCH);
1298         ecode++;
1299         break;
1300         }
1301       }
1302     /* ... else fall through for endonly */
1303
1304     /* End of subject assertion (\z) */
1305
1306     case OP_EOD:
1307     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1308     ecode++;
1309     break;
1310
1311     /* End of subject or ending \n assertion (\Z) */
1312
1313     case OP_EODN:
1314     if (eptr != md->end_subject &&
1315         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1316       RRETURN(MATCH_NOMATCH);
1317     ecode++;
1318     break;
1319
1320     /* Word boundary assertions */
1321
1322     case OP_NOT_WORD_BOUNDARY:
1323     case OP_WORD_BOUNDARY:
1324       {
1325
1326       /* Find out if the previous and current characters are "word" characters.
1327       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1328       be "non-word" characters. */
1329
1330 #ifdef SUPPORT_UTF8
1331       if (utf8)
1332         {
1333         if (eptr == md->start_subject) prev_is_word = FALSE; else
1334           {
1335           const uschar *lastptr = eptr - 1;
1336           while((*lastptr & 0xc0) == 0x80) lastptr--;
1337           GETCHAR(c, lastptr);
1338           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1339           }
1340         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1341           {
1342           GETCHAR(c, eptr);
1343           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1344           }
1345         }
1346       else
1347 #endif
1348
1349       /* More streamlined when not in UTF-8 mode */
1350
1351         {
1352         prev_is_word = (eptr != md->start_subject) &&
1353           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1354         cur_is_word = (eptr < md->end_subject) &&
1355           ((md->ctypes[*eptr] & ctype_word) != 0);
1356         }
1357
1358       /* Now see if the situation is what we want */
1359
1360       if ((*ecode++ == OP_WORD_BOUNDARY)?
1361            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1362         RRETURN(MATCH_NOMATCH);
1363       }
1364     break;
1365
1366     /* Match a single character type; inline for speed */
1367
1368     case OP_ANY:
1369     if ((ims & PCRE_DOTALL) == 0)
1370       {
1371       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1372       }
1373     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1374     if (utf8)
1375       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1376     ecode++;
1377     break;
1378
1379     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1380     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1381
1382     case OP_ANYBYTE:
1383     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1384     ecode++;
1385     break;
1386
1387     case OP_NOT_DIGIT:
1388     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1389     GETCHARINCTEST(c, eptr);
1390     if (
1391 #ifdef SUPPORT_UTF8
1392        c < 256 &&
1393 #endif
1394        (md->ctypes[c] & ctype_digit) != 0
1395        )
1396       RRETURN(MATCH_NOMATCH);
1397     ecode++;
1398     break;
1399
1400     case OP_DIGIT:
1401     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1402     GETCHARINCTEST(c, eptr);
1403     if (
1404 #ifdef SUPPORT_UTF8
1405        c >= 256 ||
1406 #endif
1407        (md->ctypes[c] & ctype_digit) == 0
1408        )
1409       RRETURN(MATCH_NOMATCH);
1410     ecode++;
1411     break;
1412
1413     case OP_NOT_WHITESPACE:
1414     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1415     GETCHARINCTEST(c, eptr);
1416     if (
1417 #ifdef SUPPORT_UTF8
1418        c < 256 &&
1419 #endif
1420        (md->ctypes[c] & ctype_space) != 0
1421        )
1422       RRETURN(MATCH_NOMATCH);
1423     ecode++;
1424     break;
1425
1426     case OP_WHITESPACE:
1427     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1428     GETCHARINCTEST(c, eptr);
1429     if (
1430 #ifdef SUPPORT_UTF8
1431        c >= 256 ||
1432 #endif
1433        (md->ctypes[c] & ctype_space) == 0
1434        )
1435       RRETURN(MATCH_NOMATCH);
1436     ecode++;
1437     break;
1438
1439     case OP_NOT_WORDCHAR:
1440     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1441     GETCHARINCTEST(c, eptr);
1442     if (
1443 #ifdef SUPPORT_UTF8
1444        c < 256 &&
1445 #endif
1446        (md->ctypes[c] & ctype_word) != 0
1447        )
1448       RRETURN(MATCH_NOMATCH);
1449     ecode++;
1450     break;
1451
1452     case OP_WORDCHAR:
1453     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1454     GETCHARINCTEST(c, eptr);
1455     if (
1456 #ifdef SUPPORT_UTF8
1457        c >= 256 ||
1458 #endif
1459        (md->ctypes[c] & ctype_word) == 0
1460        )
1461       RRETURN(MATCH_NOMATCH);
1462     ecode++;
1463     break;
1464
1465     case OP_ANYNL:
1466     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1467     GETCHARINCTEST(c, eptr);
1468     switch(c)
1469       {
1470       default: RRETURN(MATCH_NOMATCH);
1471       case 0x000d:
1472       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1473       break;
1474       case 0x000a:
1475       case 0x000b:
1476       case 0x000c:
1477       case 0x0085:
1478       case 0x2028:
1479       case 0x2029:
1480       break;
1481       }
1482     ecode++;
1483     break;
1484
1485     case OP_NOT_HSPACE:
1486     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1487     GETCHARINCTEST(c, eptr);
1488     switch(c)
1489       {
1490       default: break;
1491       case 0x09:      /* HT */
1492       case 0x20:      /* SPACE */
1493       case 0xa0:      /* NBSP */
1494       case 0x1680:    /* OGHAM SPACE MARK */
1495       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1496       case 0x2000:    /* EN QUAD */
1497       case 0x2001:    /* EM QUAD */
1498       case 0x2002:    /* EN SPACE */
1499       case 0x2003:    /* EM SPACE */
1500       case 0x2004:    /* THREE-PER-EM SPACE */
1501       case 0x2005:    /* FOUR-PER-EM SPACE */
1502       case 0x2006:    /* SIX-PER-EM SPACE */
1503       case 0x2007:    /* FIGURE SPACE */
1504       case 0x2008:    /* PUNCTUATION SPACE */
1505       case 0x2009:    /* THIN SPACE */
1506       case 0x200A:    /* HAIR SPACE */
1507       case 0x202f:    /* NARROW NO-BREAK SPACE */
1508       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1509       case 0x3000:    /* IDEOGRAPHIC SPACE */
1510       RRETURN(MATCH_NOMATCH);
1511       }
1512     ecode++;
1513     break;
1514
1515     case OP_HSPACE:
1516     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1517     GETCHARINCTEST(c, eptr);
1518     switch(c)
1519       {
1520       default: RRETURN(MATCH_NOMATCH);
1521       case 0x09:      /* HT */
1522       case 0x20:      /* SPACE */
1523       case 0xa0:      /* NBSP */
1524       case 0x1680:    /* OGHAM SPACE MARK */
1525       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1526       case 0x2000:    /* EN QUAD */
1527       case 0x2001:    /* EM QUAD */
1528       case 0x2002:    /* EN SPACE */
1529       case 0x2003:    /* EM SPACE */
1530       case 0x2004:    /* THREE-PER-EM SPACE */
1531       case 0x2005:    /* FOUR-PER-EM SPACE */
1532       case 0x2006:    /* SIX-PER-EM SPACE */
1533       case 0x2007:    /* FIGURE SPACE */
1534       case 0x2008:    /* PUNCTUATION SPACE */
1535       case 0x2009:    /* THIN SPACE */
1536       case 0x200A:    /* HAIR SPACE */
1537       case 0x202f:    /* NARROW NO-BREAK SPACE */
1538       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1539       case 0x3000:    /* IDEOGRAPHIC SPACE */
1540       break;
1541       }
1542     ecode++;
1543     break;
1544
1545     case OP_NOT_VSPACE:
1546     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1547     GETCHARINCTEST(c, eptr);
1548     switch(c)
1549       {
1550       default: break;
1551       case 0x0a:      /* LF */
1552       case 0x0b:      /* VT */
1553       case 0x0c:      /* FF */
1554       case 0x0d:      /* CR */
1555       case 0x85:      /* NEL */
1556       case 0x2028:    /* LINE SEPARATOR */
1557       case 0x2029:    /* PARAGRAPH SEPARATOR */
1558       RRETURN(MATCH_NOMATCH);
1559       }
1560     ecode++;
1561     break;
1562
1563     case OP_VSPACE:
1564     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1565     GETCHARINCTEST(c, eptr);
1566     switch(c)
1567       {
1568       default: RRETURN(MATCH_NOMATCH);
1569       case 0x0a:      /* LF */
1570       case 0x0b:      /* VT */
1571       case 0x0c:      /* FF */
1572       case 0x0d:      /* CR */
1573       case 0x85:      /* NEL */
1574       case 0x2028:    /* LINE SEPARATOR */
1575       case 0x2029:    /* PARAGRAPH SEPARATOR */
1576       break;
1577       }
1578     ecode++;
1579     break;
1580
1581 #ifdef SUPPORT_UCP
1582     /* Check the next character by Unicode property. We will get here only
1583     if the support is in the binary; otherwise a compile-time error occurs. */
1584
1585     case OP_PROP:
1586     case OP_NOTPROP:
1587     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1588     GETCHARINCTEST(c, eptr);
1589       {
1590       int chartype, script;
1591       int category = _pcre_ucp_findprop(c, &chartype, &script);
1592
1593       switch(ecode[1])
1594         {
1595         case PT_ANY:
1596         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1597         break;
1598
1599         case PT_LAMP:
1600         if ((chartype == ucp_Lu ||
1601              chartype == ucp_Ll ||
1602              chartype == ucp_Lt) == (op == OP_NOTPROP))
1603           RRETURN(MATCH_NOMATCH);
1604          break;
1605
1606         case PT_GC:
1607         if ((ecode[2] != category) == (op == OP_PROP))
1608           RRETURN(MATCH_NOMATCH);
1609         break;
1610
1611         case PT_PC:
1612         if ((ecode[2] != chartype) == (op == OP_PROP))
1613           RRETURN(MATCH_NOMATCH);
1614         break;
1615
1616         case PT_SC:
1617         if ((ecode[2] != script) == (op == OP_PROP))
1618           RRETURN(MATCH_NOMATCH);
1619         break;
1620
1621         default:
1622         RRETURN(PCRE_ERROR_INTERNAL);
1623         }
1624
1625       ecode += 3;
1626       }
1627     break;
1628
1629     /* Match an extended Unicode sequence. We will get here only if the support
1630     is in the binary; otherwise a compile-time error occurs. */
1631
1632     case OP_EXTUNI:
1633     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1634     GETCHARINCTEST(c, eptr);
1635       {
1636       int chartype, script;
1637       int category = _pcre_ucp_findprop(c, &chartype, &script);
1638       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1639       while (eptr < md->end_subject)
1640         {
1641         int len = 1;
1642         if (!utf8) c = *eptr; else
1643           {
1644           GETCHARLEN(c, eptr, len);
1645           }
1646         category = _pcre_ucp_findprop(c, &chartype, &script);
1647         if (category != ucp_M) break;
1648         eptr += len;
1649         }
1650       }
1651     ecode++;
1652     break;
1653 #endif
1654
1655
1656     /* Match a back reference, possibly repeatedly. Look past the end of the
1657     item to see if there is repeat information following. The code is similar
1658     to that for character classes, but repeated for efficiency. Then obey
1659     similar code to character type repeats - written out again for speed.
1660     However, if the referenced string is the empty string, always treat
1661     it as matched, any number of times (otherwise there could be infinite
1662     loops). */
1663
1664     case OP_REF:
1665       {
1666       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1667       ecode += 3;                                 /* Advance past item */
1668
1669       /* If the reference is unset, set the length to be longer than the amount
1670       of subject left; this ensures that every attempt at a match fails. We
1671       can't just fail here, because of the possibility of quantifiers with zero
1672       minima. */
1673
1674       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1675         md->end_subject - eptr + 1 :
1676         md->offset_vector[offset+1] - md->offset_vector[offset];
1677
1678       /* Set up for repetition, or handle the non-repeated case */
1679
1680       switch (*ecode)
1681         {
1682         case OP_CRSTAR:
1683         case OP_CRMINSTAR:
1684         case OP_CRPLUS:
1685         case OP_CRMINPLUS:
1686         case OP_CRQUERY:
1687         case OP_CRMINQUERY:
1688         c = *ecode++ - OP_CRSTAR;
1689         minimize = (c & 1) != 0;
1690         min = rep_min[c];                 /* Pick up values from tables; */
1691         max = rep_max[c];                 /* zero for max => infinity */
1692         if (max == 0) max = INT_MAX;
1693         break;
1694
1695         case OP_CRRANGE:
1696         case OP_CRMINRANGE:
1697         minimize = (*ecode == OP_CRMINRANGE);
1698         min = GET2(ecode, 1);
1699         max = GET2(ecode, 3);
1700         if (max == 0) max = INT_MAX;
1701         ecode += 5;
1702         break;
1703
1704         default:               /* No repeat follows */
1705         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1706         eptr += length;
1707         continue;              /* With the main loop */
1708         }
1709
1710       /* If the length of the reference is zero, just continue with the
1711       main loop. */
1712
1713       if (length == 0) continue;
1714
1715       /* First, ensure the minimum number of matches are present. We get back
1716       the length of the reference string explicitly rather than passing the
1717       address of eptr, so that eptr can be a register variable. */
1718
1719       for (i = 1; i <= min; i++)
1720         {
1721         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1722         eptr += length;
1723         }
1724
1725       /* If min = max, continue at the same level without recursion.
1726       They are not both allowed to be zero. */
1727
1728       if (min == max) continue;
1729
1730       /* If minimizing, keep trying and advancing the pointer */
1731
1732       if (minimize)
1733         {
1734         for (fi = min;; fi++)
1735           {
1736           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1737           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1738           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1739             RRETURN(MATCH_NOMATCH);
1740           eptr += length;
1741           }
1742         /* Control never gets here */
1743         }
1744
1745       /* If maximizing, find the longest string and work backwards */
1746
1747       else
1748         {
1749         pp = eptr;
1750         for (i = min; i < max; i++)
1751           {
1752           if (!match_ref(offset, eptr, length, md, ims)) break;
1753           eptr += length;
1754           }
1755         while (eptr >= pp)
1756           {
1757           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1758           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1759           eptr -= length;
1760           }
1761         RRETURN(MATCH_NOMATCH);
1762         }
1763       }
1764     /* Control never gets here */
1765
1766
1767
1768     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1769     used when all the characters in the class have values in the range 0-255,
1770     and either the matching is caseful, or the characters are in the range
1771     0-127 when UTF-8 processing is enabled. The only difference between
1772     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1773     encountered.
1774
1775     First, look past the end of the item to see if there is repeat information
1776     following. Then obey similar code to character type repeats - written out
1777     again for speed. */
1778
1779     case OP_NCLASS:
1780     case OP_CLASS:
1781       {
1782       data = ecode + 1;                /* Save for matching */
1783       ecode += 33;                     /* Advance past the item */
1784
1785       switch (*ecode)
1786         {
1787         case OP_CRSTAR:
1788         case OP_CRMINSTAR:
1789         case OP_CRPLUS:
1790         case OP_CRMINPLUS:
1791         case OP_CRQUERY:
1792         case OP_CRMINQUERY:
1793         c = *ecode++ - OP_CRSTAR;
1794         minimize = (c & 1) != 0;
1795         min = rep_min[c];                 /* Pick up values from tables; */
1796         max = rep_max[c];                 /* zero for max => infinity */
1797         if (max == 0) max = INT_MAX;
1798         break;
1799
1800         case OP_CRRANGE:
1801         case OP_CRMINRANGE:
1802         minimize = (*ecode == OP_CRMINRANGE);
1803         min = GET2(ecode, 1);
1804         max = GET2(ecode, 3);
1805         if (max == 0) max = INT_MAX;
1806         ecode += 5;
1807         break;
1808
1809         default:               /* No repeat follows */
1810         min = max = 1;
1811         break;
1812         }
1813
1814       /* First, ensure the minimum number of matches are present. */
1815
1816 #ifdef SUPPORT_UTF8
1817       /* UTF-8 mode */
1818       if (utf8)
1819         {
1820         for (i = 1; i <= min; i++)
1821           {
1822           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1823           GETCHARINC(c, eptr);
1824           if (c > 255)
1825             {
1826             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1827             }
1828           else
1829             {
1830             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1831             }
1832           }
1833         }
1834       else
1835 #endif
1836       /* Not UTF-8 mode */
1837         {
1838         for (i = 1; i <= min; i++)
1839           {
1840           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1841           c = *eptr++;
1842           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1843           }
1844         }
1845
1846       /* If max == min we can continue with the main loop without the
1847       need to recurse. */
1848
1849       if (min == max) continue;
1850
1851       /* If minimizing, keep testing the rest of the expression and advancing
1852       the pointer while it matches the class. */
1853
1854       if (minimize)
1855         {
1856 #ifdef SUPPORT_UTF8
1857         /* UTF-8 mode */
1858         if (utf8)
1859           {
1860           for (fi = min;; fi++)
1861             {
1862             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1863             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1864             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1865             GETCHARINC(c, eptr);
1866             if (c > 255)
1867               {
1868               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1869               }
1870             else
1871               {
1872               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1873               }
1874             }
1875           }
1876         else
1877 #endif
1878         /* Not UTF-8 mode */
1879           {
1880           for (fi = min;; fi++)
1881             {
1882             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1883             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1884             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1885             c = *eptr++;
1886             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1887             }
1888           }
1889         /* Control never gets here */
1890         }
1891
1892       /* If maximizing, find the longest possible run, then work backwards. */
1893
1894       else
1895         {
1896         pp = eptr;
1897
1898 #ifdef SUPPORT_UTF8
1899         /* UTF-8 mode */
1900         if (utf8)
1901           {
1902           for (i = min; i < max; i++)
1903             {
1904             int len = 1;
1905             if (eptr >= md->end_subject) break;
1906             GETCHARLEN(c, eptr, len);
1907             if (c > 255)
1908               {
1909               if (op == OP_CLASS) break;
1910               }
1911             else
1912               {
1913               if ((data[c/8] & (1 << (c&7))) == 0) break;
1914               }
1915             eptr += len;
1916             }
1917           for (;;)
1918             {
1919             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1920             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1921             if (eptr-- == pp) break;        /* Stop if tried at original pos */
1922             BACKCHAR(eptr);
1923             }
1924           }
1925         else
1926 #endif
1927           /* Not UTF-8 mode */
1928           {
1929           for (i = min; i < max; i++)
1930             {
1931             if (eptr >= md->end_subject) break;
1932             c = *eptr;
1933             if ((data[c/8] & (1 << (c&7))) == 0) break;
1934             eptr++;
1935             }
1936           while (eptr >= pp)
1937             {
1938             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1939             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1940             eptr--;
1941             }
1942           }
1943
1944         RRETURN(MATCH_NOMATCH);
1945         }
1946       }
1947     /* Control never gets here */
1948
1949
1950     /* Match an extended character class. This opcode is encountered only
1951     in UTF-8 mode, because that's the only time it is compiled. */
1952
1953 #ifdef SUPPORT_UTF8
1954     case OP_XCLASS:
1955       {
1956       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
1957       ecode += GET(ecode, 1);                      /* Advance past the item */
1958
1959       switch (*ecode)
1960         {
1961         case OP_CRSTAR:
1962         case OP_CRMINSTAR:
1963         case OP_CRPLUS:
1964         case OP_CRMINPLUS:
1965         case OP_CRQUERY:
1966         case OP_CRMINQUERY:
1967         c = *ecode++ - OP_CRSTAR;
1968         minimize = (c & 1) != 0;
1969         min = rep_min[c];                 /* Pick up values from tables; */
1970         max = rep_max[c];                 /* zero for max => infinity */
1971         if (max == 0) max = INT_MAX;
1972         break;
1973
1974         case OP_CRRANGE:
1975         case OP_CRMINRANGE:
1976         minimize = (*ecode == OP_CRMINRANGE);
1977         min = GET2(ecode, 1);
1978         max = GET2(ecode, 3);
1979         if (max == 0) max = INT_MAX;
1980         ecode += 5;
1981         break;
1982
1983         default:               /* No repeat follows */
1984         min = max = 1;
1985         break;
1986         }
1987
1988       /* First, ensure the minimum number of matches are present. */
1989
1990       for (i = 1; i <= min; i++)
1991         {
1992         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1993         GETCHARINC(c, eptr);
1994         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1995         }
1996
1997       /* If max == min we can continue with the main loop without the
1998       need to recurse. */
1999
2000       if (min == max) continue;
2001
2002       /* If minimizing, keep testing the rest of the expression and advancing
2003       the pointer while it matches the class. */
2004
2005       if (minimize)
2006         {
2007         for (fi = min;; fi++)
2008           {
2009           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2010           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2011           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2012           GETCHARINC(c, eptr);
2013           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2014           }
2015         /* Control never gets here */
2016         }
2017
2018       /* If maximizing, find the longest possible run, then work backwards. */
2019
2020       else
2021         {
2022         pp = eptr;
2023         for (i = min; i < max; i++)
2024           {
2025           int len = 1;
2026           if (eptr >= md->end_subject) break;
2027           GETCHARLEN(c, eptr, len);
2028           if (!_pcre_xclass(c, data)) break;
2029           eptr += len;
2030           }
2031         for(;;)
2032           {
2033           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2034           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2035           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2036           BACKCHAR(eptr)
2037           }
2038         RRETURN(MATCH_NOMATCH);
2039         }
2040
2041       /* Control never gets here */
2042       }
2043 #endif    /* End of XCLASS */
2044
2045     /* Match a single character, casefully */
2046
2047     case OP_CHAR:
2048 #ifdef SUPPORT_UTF8
2049     if (utf8)
2050       {
2051       length = 1;
2052       ecode++;
2053       GETCHARLEN(fc, ecode, length);
2054       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2055       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2056       }
2057     else
2058 #endif
2059
2060     /* Non-UTF-8 mode */
2061       {
2062       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2063       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2064       ecode += 2;
2065       }
2066     break;
2067
2068     /* Match a single character, caselessly */
2069
2070     case OP_CHARNC:
2071 #ifdef SUPPORT_UTF8
2072     if (utf8)
2073       {
2074       length = 1;
2075       ecode++;
2076       GETCHARLEN(fc, ecode, length);
2077
2078       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2079
2080       /* If the pattern character's value is < 128, we have only one byte, and
2081       can use the fast lookup table. */
2082
2083       if (fc < 128)
2084         {
2085         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2086         }
2087
2088       /* Otherwise we must pick up the subject character */
2089
2090       else
2091         {
2092         unsigned int dc;
2093         GETCHARINC(dc, eptr);
2094         ecode += length;
2095
2096         /* If we have Unicode property support, we can use it to test the other
2097         case of the character, if there is one. */
2098
2099         if (fc != dc)
2100           {
2101 #ifdef SUPPORT_UCP
2102           if (dc != _pcre_ucp_othercase(fc))
2103 #endif
2104             RRETURN(MATCH_NOMATCH);
2105           }
2106         }
2107       }
2108     else
2109 #endif   /* SUPPORT_UTF8 */
2110
2111     /* Non-UTF-8 mode */
2112       {
2113       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2114       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2115       ecode += 2;
2116       }
2117     break;
2118
2119     /* Match a single character repeatedly. */
2120
2121     case OP_EXACT:
2122     min = max = GET2(ecode, 1);
2123     ecode += 3;
2124     goto REPEATCHAR;
2125
2126     case OP_POSUPTO:
2127     possessive = TRUE;
2128     /* Fall through */
2129
2130     case OP_UPTO:
2131     case OP_MINUPTO:
2132     min = 0;
2133     max = GET2(ecode, 1);
2134     minimize = *ecode == OP_MINUPTO;
2135     ecode += 3;
2136     goto REPEATCHAR;
2137
2138     case OP_POSSTAR:
2139     possessive = TRUE;
2140     min = 0;
2141     max = INT_MAX;
2142     ecode++;
2143     goto REPEATCHAR;
2144
2145     case OP_POSPLUS:
2146     possessive = TRUE;
2147     min = 1;
2148     max = INT_MAX;
2149     ecode++;
2150     goto REPEATCHAR;
2151
2152     case OP_POSQUERY:
2153     possessive = TRUE;
2154     min = 0;
2155     max = 1;
2156     ecode++;
2157     goto REPEATCHAR;
2158
2159     case OP_STAR:
2160     case OP_MINSTAR:
2161     case OP_PLUS:
2162     case OP_MINPLUS:
2163     case OP_QUERY:
2164     case OP_MINQUERY:
2165     c = *ecode++ - OP_STAR;
2166     minimize = (c & 1) != 0;
2167     min = rep_min[c];                 /* Pick up values from tables; */
2168     max = rep_max[c];                 /* zero for max => infinity */
2169     if (max == 0) max = INT_MAX;
2170
2171     /* Common code for all repeated single-character matches. We can give
2172     up quickly if there are fewer than the minimum number of characters left in
2173     the subject. */
2174
2175     REPEATCHAR:
2176 #ifdef SUPPORT_UTF8
2177     if (utf8)
2178       {
2179       length = 1;
2180       charptr = ecode;
2181       GETCHARLEN(fc, ecode, length);
2182       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2183       ecode += length;
2184
2185       /* Handle multibyte character matching specially here. There is
2186       support for caseless matching if UCP support is present. */
2187
2188       if (length > 1)
2189         {
2190 #ifdef SUPPORT_UCP
2191         unsigned int othercase;
2192         if ((ims & PCRE_CASELESS) != 0 &&
2193             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2194           oclength = _pcre_ord2utf8(othercase, occhars);
2195         else oclength = 0;
2196 #endif  /* SUPPORT_UCP */
2197
2198         for (i = 1; i <= min; i++)
2199           {
2200           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2201 #ifdef SUPPORT_UCP
2202           /* Need braces because of following else */
2203           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2204           else
2205             {
2206             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2207             eptr += oclength;
2208             }
2209 #else   /* without SUPPORT_UCP */
2210           else { RRETURN(MATCH_NOMATCH); }
2211 #endif  /* SUPPORT_UCP */
2212           }
2213
2214         if (min == max) continue;
2215
2216         if (minimize)
2217           {
2218           for (fi = min;; fi++)
2219             {
2220             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2221             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2222             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2223             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2224 #ifdef SUPPORT_UCP
2225             /* Need braces because of following else */
2226             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2227             else
2228               {
2229               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2230               eptr += oclength;
2231               }
2232 #else   /* without SUPPORT_UCP */
2233             else { RRETURN (MATCH_NOMATCH); }
2234 #endif  /* SUPPORT_UCP */
2235             }
2236           /* Control never gets here */
2237           }
2238
2239         else  /* Maximize */
2240           {
2241           pp = eptr;
2242           for (i = min; i < max; i++)
2243             {
2244             if (eptr > md->end_subject - length) break;
2245             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2246 #ifdef SUPPORT_UCP
2247             else if (oclength == 0) break;
2248             else
2249               {
2250               if (memcmp(eptr, occhars, oclength) != 0) break;
2251               eptr += oclength;
2252               }
2253 #else   /* without SUPPORT_UCP */
2254             else break;
2255 #endif  /* SUPPORT_UCP */
2256             }
2257
2258           if (possessive) continue;
2259           for(;;)
2260            {
2261            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2262            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2263            if (eptr == pp) RRETURN(MATCH_NOMATCH);
2264 #ifdef SUPPORT_UCP
2265            eptr--;
2266            BACKCHAR(eptr);
2267 #else   /* without SUPPORT_UCP */
2268            eptr -= length;
2269 #endif  /* SUPPORT_UCP */
2270            }
2271           }
2272         /* Control never gets here */
2273         }
2274
2275       /* If the length of a UTF-8 character is 1, we fall through here, and
2276       obey the code as for non-UTF-8 characters below, though in this case the
2277       value of fc will always be < 128. */
2278       }
2279     else
2280 #endif  /* SUPPORT_UTF8 */
2281
2282     /* When not in UTF-8 mode, load a single-byte character. */
2283       {
2284       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2285       fc = *ecode++;
2286       }
2287
2288     /* The value of fc at this point is always less than 256, though we may or
2289     may not be in UTF-8 mode. The code is duplicated for the caseless and
2290     caseful cases, for speed, since matching characters is likely to be quite
2291     common. First, ensure the minimum number of matches are present. If min =
2292     max, continue at the same level without recursing. Otherwise, if
2293     minimizing, keep trying the rest of the expression and advancing one
2294     matching character if failing, up to the maximum. Alternatively, if
2295     maximizing, find the maximum number of characters and work backwards. */
2296
2297     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2298       max, eptr));
2299
2300     if ((ims & PCRE_CASELESS) != 0)
2301       {
2302       fc = md->lcc[fc];
2303       for (i = 1; i <= min; i++)
2304         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2305       if (min == max) continue;
2306       if (minimize)
2307         {
2308         for (fi = min;; fi++)
2309           {
2310           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2311           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2312           if (fi >= max || eptr >= md->end_subject ||
2313               fc != md->lcc[*eptr++])
2314             RRETURN(MATCH_NOMATCH);
2315           }
2316         /* Control never gets here */
2317         }
2318       else  /* Maximize */
2319         {
2320         pp = eptr;
2321         for (i = min; i < max; i++)
2322           {
2323           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2324           eptr++;
2325           }
2326         if (possessive) continue;
2327         while (eptr >= pp)
2328           {
2329           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2330           eptr--;
2331           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2332           }
2333         RRETURN(MATCH_NOMATCH);
2334         }
2335       /* Control never gets here */
2336       }
2337
2338     /* Caseful comparisons (includes all multi-byte characters) */
2339
2340     else
2341       {
2342       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2343       if (min == max) continue;
2344       if (minimize)
2345         {
2346         for (fi = min;; fi++)
2347           {
2348           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2349           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2350           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2351             RRETURN(MATCH_NOMATCH);
2352           }
2353         /* Control never gets here */
2354         }
2355       else  /* Maximize */
2356         {
2357         pp = eptr;
2358         for (i = min; i < max; i++)
2359           {
2360           if (eptr >= md->end_subject || fc != *eptr) break;
2361           eptr++;
2362           }
2363         if (possessive) continue;
2364         while (eptr >= pp)
2365           {
2366           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2367           eptr--;
2368           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2369           }
2370         RRETURN(MATCH_NOMATCH);
2371         }
2372       }
2373     /* Control never gets here */
2374
2375     /* Match a negated single one-byte character. The character we are
2376     checking can be multibyte. */
2377
2378     case OP_NOT:
2379     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2380     ecode++;
2381     GETCHARINCTEST(c, eptr);
2382     if ((ims & PCRE_CASELESS) != 0)
2383       {
2384 #ifdef SUPPORT_UTF8
2385       if (c < 256)
2386 #endif
2387       c = md->lcc[c];
2388       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2389       }
2390     else
2391       {
2392       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2393       }
2394     break;
2395
2396     /* Match a negated single one-byte character repeatedly. This is almost a
2397     repeat of the code for a repeated single character, but I haven't found a
2398     nice way of commoning these up that doesn't require a test of the
2399     positive/negative option for each character match. Maybe that wouldn't add
2400     very much to the time taken, but character matching *is* what this is all
2401     about... */
2402
2403     case OP_NOTEXACT:
2404     min = max = GET2(ecode, 1);
2405     ecode += 3;
2406     goto REPEATNOTCHAR;
2407
2408     case OP_NOTUPTO:
2409     case OP_NOTMINUPTO:
2410     min = 0;
2411     max = GET2(ecode, 1);
2412     minimize = *ecode == OP_NOTMINUPTO;
2413     ecode += 3;
2414     goto REPEATNOTCHAR;
2415
2416     case OP_NOTPOSSTAR:
2417     possessive = TRUE;
2418     min = 0;
2419     max = INT_MAX;
2420     ecode++;
2421     goto REPEATNOTCHAR;
2422
2423     case OP_NOTPOSPLUS:
2424     possessive = TRUE;
2425     min = 1;
2426     max = INT_MAX;
2427     ecode++;
2428     goto REPEATNOTCHAR;
2429
2430     case OP_NOTPOSQUERY:
2431     possessive = TRUE;
2432     min = 0;
2433     max = 1;
2434     ecode++;
2435     goto REPEATNOTCHAR;
2436
2437     case OP_NOTPOSUPTO:
2438     possessive = TRUE;
2439     min = 0;
2440     max = GET2(ecode, 1);
2441     ecode += 3;
2442     goto REPEATNOTCHAR;
2443
2444     case OP_NOTSTAR:
2445     case OP_NOTMINSTAR:
2446     case OP_NOTPLUS:
2447     case OP_NOTMINPLUS:
2448     case OP_NOTQUERY:
2449     case OP_NOTMINQUERY:
2450     c = *ecode++ - OP_NOTSTAR;
2451     minimize = (c & 1) != 0;
2452     min = rep_min[c];                 /* Pick up values from tables; */
2453     max = rep_max[c];                 /* zero for max => infinity */
2454     if (max == 0) max = INT_MAX;
2455
2456     /* Common code for all repeated single-byte matches. We can give up quickly
2457     if there are fewer than the minimum number of bytes left in the
2458     subject. */
2459
2460     REPEATNOTCHAR:
2461     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2462     fc = *ecode++;
2463
2464     /* The code is duplicated for the caseless and caseful cases, for speed,
2465     since matching characters is likely to be quite common. First, ensure the
2466     minimum number of matches are present. If min = max, continue at the same
2467     level without recursing. Otherwise, if minimizing, keep trying the rest of
2468     the expression and advancing one matching character if failing, up to the
2469     maximum. Alternatively, if maximizing, find the maximum number of
2470     characters and work backwards. */
2471
2472     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2473       max, eptr));
2474
2475     if ((ims & PCRE_CASELESS) != 0)
2476       {
2477       fc = md->lcc[fc];
2478
2479 #ifdef SUPPORT_UTF8
2480       /* UTF-8 mode */
2481       if (utf8)
2482         {
2483         register unsigned int d;
2484         for (i = 1; i <= min; i++)
2485           {
2486           GETCHARINC(d, eptr);
2487           if (d < 256) d = md->lcc[d];
2488           if (fc == d) RRETURN(MATCH_NOMATCH);
2489           }
2490         }
2491       else
2492 #endif
2493
2494       /* Not UTF-8 mode */
2495         {
2496         for (i = 1; i <= min; i++)
2497           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2498         }
2499
2500       if (min == max) continue;
2501
2502       if (minimize)
2503         {
2504 #ifdef SUPPORT_UTF8
2505         /* UTF-8 mode */
2506         if (utf8)
2507           {
2508           register unsigned int d;
2509           for (fi = min;; fi++)
2510             {
2511             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2512             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2513             GETCHARINC(d, eptr);
2514             if (d < 256) d = md->lcc[d];
2515             if (fi >= max || eptr >= md->end_subject || fc == d)
2516               RRETURN(MATCH_NOMATCH);
2517             }
2518           }
2519         else
2520 #endif
2521         /* Not UTF-8 mode */
2522           {
2523           for (fi = min;; fi++)
2524             {
2525             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2526             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2528               RRETURN(MATCH_NOMATCH);
2529             }
2530           }
2531         /* Control never gets here */
2532         }
2533
2534       /* Maximize case */
2535
2536       else
2537         {
2538         pp = eptr;
2539
2540 #ifdef SUPPORT_UTF8
2541         /* UTF-8 mode */
2542         if (utf8)
2543           {
2544           register unsigned int d;
2545           for (i = min; i < max; i++)
2546             {
2547             int len = 1;
2548             if (eptr >= md->end_subject) break;
2549             GETCHARLEN(d, eptr, len);
2550             if (d < 256) d = md->lcc[d];
2551             if (fc == d) break;
2552             eptr += len;
2553             }
2554         if (possessive) continue;
2555         for(;;)
2556             {
2557             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2558             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2559             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2560             BACKCHAR(eptr);
2561             }
2562           }
2563         else
2564 #endif
2565         /* Not UTF-8 mode */
2566           {
2567           for (i = min; i < max; i++)
2568             {
2569             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2570             eptr++;
2571             }
2572           if (possessive) continue;
2573           while (eptr >= pp)
2574             {
2575             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2576             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2577             eptr--;
2578             }
2579           }
2580
2581         RRETURN(MATCH_NOMATCH);
2582         }
2583       /* Control never gets here */
2584       }
2585
2586     /* Caseful comparisons */
2587
2588     else
2589       {
2590 #ifdef SUPPORT_UTF8
2591       /* UTF-8 mode */
2592       if (utf8)
2593         {
2594         register unsigned int d;
2595         for (i = 1; i <= min; i++)
2596           {
2597           GETCHARINC(d, eptr);
2598           if (fc == d) RRETURN(MATCH_NOMATCH);
2599           }
2600         }
2601       else
2602 #endif
2603       /* Not UTF-8 mode */
2604         {
2605         for (i = 1; i <= min; i++)
2606           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2607         }
2608
2609       if (min == max) continue;
2610
2611       if (minimize)
2612         {
2613 #ifdef SUPPORT_UTF8
2614         /* UTF-8 mode */
2615         if (utf8)
2616           {
2617           register unsigned int d;
2618           for (fi = min;; fi++)
2619             {
2620             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2621             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2622             GETCHARINC(d, eptr);
2623             if (fi >= max || eptr >= md->end_subject || fc == d)
2624               RRETURN(MATCH_NOMATCH);
2625             }
2626           }
2627         else
2628 #endif
2629         /* Not UTF-8 mode */
2630           {
2631           for (fi = min;; fi++)
2632             {
2633             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2634             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2636               RRETURN(MATCH_NOMATCH);
2637             }
2638           }
2639         /* Control never gets here */
2640         }
2641
2642       /* Maximize case */
2643
2644       else
2645         {
2646         pp = eptr;
2647
2648 #ifdef SUPPORT_UTF8
2649         /* UTF-8 mode */
2650         if (utf8)
2651           {
2652           register unsigned int d;
2653           for (i = min; i < max; i++)
2654             {
2655             int len = 1;
2656             if (eptr >= md->end_subject) break;
2657             GETCHARLEN(d, eptr, len);
2658             if (fc == d) break;
2659             eptr += len;
2660             }
2661           if (possessive) continue;
2662           for(;;)
2663             {
2664             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2665             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2667             BACKCHAR(eptr);
2668             }
2669           }
2670         else
2671 #endif
2672         /* Not UTF-8 mode */
2673           {
2674           for (i = min; i < max; i++)
2675             {
2676             if (eptr >= md->end_subject || fc == *eptr) break;
2677             eptr++;
2678             }
2679           if (possessive) continue;
2680           while (eptr >= pp)
2681             {
2682             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2683             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684             eptr--;
2685             }
2686           }
2687
2688         RRETURN(MATCH_NOMATCH);
2689         }
2690       }
2691     /* Control never gets here */
2692
2693     /* Match a single character type repeatedly; several different opcodes
2694     share code. This is very similar to the code for single characters, but we
2695     repeat it in the interests of efficiency. */
2696
2697     case OP_TYPEEXACT:
2698     min = max = GET2(ecode, 1);
2699     minimize = TRUE;
2700     ecode += 3;
2701     goto REPEATTYPE;
2702
2703     case OP_TYPEUPTO:
2704     case OP_TYPEMINUPTO:
2705     min = 0;
2706     max = GET2(ecode, 1);
2707     minimize = *ecode == OP_TYPEMINUPTO;
2708     ecode += 3;
2709     goto REPEATTYPE;
2710
2711     case OP_TYPEPOSSTAR:
2712     possessive = TRUE;
2713     min = 0;
2714     max = INT_MAX;
2715     ecode++;
2716     goto REPEATTYPE;
2717
2718     case OP_TYPEPOSPLUS:
2719     possessive = TRUE;
2720     min = 1;
2721     max = INT_MAX;
2722     ecode++;
2723     goto REPEATTYPE;
2724
2725     case OP_TYPEPOSQUERY:
2726     possessive = TRUE;
2727     min = 0;
2728     max = 1;
2729     ecode++;
2730     goto REPEATTYPE;
2731
2732     case OP_TYPEPOSUPTO:
2733     possessive = TRUE;
2734     min = 0;
2735     max = GET2(ecode, 1);
2736     ecode += 3;
2737     goto REPEATTYPE;
2738
2739     case OP_TYPESTAR:
2740     case OP_TYPEMINSTAR:
2741     case OP_TYPEPLUS:
2742     case OP_TYPEMINPLUS:
2743     case OP_TYPEQUERY:
2744     case OP_TYPEMINQUERY:
2745     c = *ecode++ - OP_TYPESTAR;
2746     minimize = (c & 1) != 0;
2747     min = rep_min[c];                 /* Pick up values from tables; */
2748     max = rep_max[c];                 /* zero for max => infinity */
2749     if (max == 0) max = INT_MAX;
2750
2751     /* Common code for all repeated single character type matches. Note that
2752     in UTF-8 mode, '.' matches a character of any length, but for the other
2753     character types, the valid characters are all one-byte long. */
2754
2755     REPEATTYPE:
2756     ctype = *ecode++;      /* Code for the character type */
2757
2758 #ifdef SUPPORT_UCP
2759     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2760       {
2761       prop_fail_result = ctype == OP_NOTPROP;
2762       prop_type = *ecode++;
2763       prop_value = *ecode++;
2764       }
2765     else prop_type = -1;
2766 #endif
2767
2768     /* First, ensure the minimum number of matches are present. Use inline
2769     code for maximizing the speed, and do the type test once at the start
2770     (i.e. keep it out of the loop). Also we can test that there are at least
2771     the minimum number of bytes before we start. This isn't as effective in
2772     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2773     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2774     and single-bytes. */
2775
2776     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2777     if (min > 0)
2778       {
2779 #ifdef SUPPORT_UCP
2780       if (prop_type >= 0)
2781         {
2782         switch(prop_type)
2783           {
2784           case PT_ANY:
2785           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2786           for (i = 1; i <= min; i++)
2787             {
2788             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2789             GETCHARINCTEST(c, eptr);
2790             }
2791           break;
2792
2793           case PT_LAMP:
2794           for (i = 1; i <= min; i++)
2795             {
2796             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2797             GETCHARINCTEST(c, eptr);
2798             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2799             if ((prop_chartype == ucp_Lu ||
2800                  prop_chartype == ucp_Ll ||
2801                  prop_chartype == ucp_Lt) == prop_fail_result)
2802               RRETURN(MATCH_NOMATCH);
2803             }
2804           break;
2805
2806           case PT_GC:
2807           for (i = 1; i <= min; i++)
2808             {
2809             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810             GETCHARINCTEST(c, eptr);
2811             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812             if ((prop_category == prop_value) == prop_fail_result)
2813               RRETURN(MATCH_NOMATCH);
2814             }
2815           break;
2816
2817           case PT_PC:
2818           for (i = 1; i <= min; i++)
2819             {
2820             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2821             GETCHARINCTEST(c, eptr);
2822             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2823             if ((prop_chartype == prop_value) == prop_fail_result)
2824               RRETURN(MATCH_NOMATCH);
2825             }
2826           break;
2827
2828           case PT_SC:
2829           for (i = 1; i <= min; i++)
2830             {
2831             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2832             GETCHARINCTEST(c, eptr);
2833             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2834             if ((prop_script == prop_value) == prop_fail_result)
2835               RRETURN(MATCH_NOMATCH);
2836             }
2837           break;
2838
2839           default:
2840           RRETURN(PCRE_ERROR_INTERNAL);
2841           }
2842         }
2843
2844       /* Match extended Unicode sequences. We will get here only if the
2845       support is in the binary; otherwise a compile-time error occurs. */
2846
2847       else if (ctype == OP_EXTUNI)
2848         {
2849         for (i = 1; i <= min; i++)
2850           {
2851           GETCHARINCTEST(c, eptr);
2852           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2853           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2854           while (eptr < md->end_subject)
2855             {
2856             int len = 1;
2857             if (!utf8) c = *eptr; else
2858               {
2859               GETCHARLEN(c, eptr, len);
2860               }
2861             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2862             if (prop_category != ucp_M) break;
2863             eptr += len;
2864             }
2865           }
2866         }
2867
2868       else
2869 #endif     /* SUPPORT_UCP */
2870
2871 /* Handle all other cases when the coding is UTF-8 */
2872
2873 #ifdef SUPPORT_UTF8
2874       if (utf8) switch(ctype)
2875         {
2876         case OP_ANY:
2877         for (i = 1; i <= min; i++)
2878           {
2879           if (eptr >= md->end_subject ||
2880                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2881             RRETURN(MATCH_NOMATCH);
2882           eptr++;
2883           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2884           }
2885         break;
2886
2887         case OP_ANYBYTE:
2888         eptr += min;
2889         break;
2890
2891         case OP_ANYNL:
2892         for (i = 1; i <= min; i++)
2893           {
2894           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2895           GETCHARINC(c, eptr);
2896           switch(c)
2897             {
2898             default: RRETURN(MATCH_NOMATCH);
2899             case 0x000d:
2900             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2901             break;
2902             case 0x000a:
2903             case 0x000b:
2904             case 0x000c:
2905             case 0x0085:
2906             case 0x2028:
2907             case 0x2029:
2908             break;
2909             }
2910           }
2911         break;
2912
2913         case OP_NOT_HSPACE:
2914         for (i = 1; i <= min; i++)
2915           {
2916           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2917           GETCHARINC(c, eptr);
2918           switch(c)
2919             {
2920             default: break;
2921             case 0x09:      /* HT */
2922             case 0x20:      /* SPACE */
2923             case 0xa0:      /* NBSP */
2924             case 0x1680:    /* OGHAM SPACE MARK */
2925             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2926             case 0x2000:    /* EN QUAD */
2927             case 0x2001:    /* EM QUAD */
2928             case 0x2002:    /* EN SPACE */
2929             case 0x2003:    /* EM SPACE */
2930             case 0x2004:    /* THREE-PER-EM SPACE */
2931             case 0x2005:    /* FOUR-PER-EM SPACE */
2932             case 0x2006:    /* SIX-PER-EM SPACE */
2933             case 0x2007:    /* FIGURE SPACE */
2934             case 0x2008:    /* PUNCTUATION SPACE */
2935             case 0x2009:    /* THIN SPACE */
2936             case 0x200A:    /* HAIR SPACE */
2937             case 0x202f:    /* NARROW NO-BREAK SPACE */
2938             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2939             case 0x3000:    /* IDEOGRAPHIC SPACE */
2940             RRETURN(MATCH_NOMATCH);
2941             }
2942           }
2943         break;
2944
2945         case OP_HSPACE:
2946         for (i = 1; i <= min; i++)
2947           {
2948           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2949           GETCHARINC(c, eptr);
2950           switch(c)
2951             {
2952             default: RRETURN(MATCH_NOMATCH);
2953             case 0x09:      /* HT */
2954             case 0x20:      /* SPACE */
2955             case 0xa0:      /* NBSP */
2956             case 0x1680:    /* OGHAM SPACE MARK */
2957             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2958             case 0x2000:    /* EN QUAD */
2959             case 0x2001:    /* EM QUAD */
2960             case 0x2002:    /* EN SPACE */
2961             case 0x2003:    /* EM SPACE */
2962             case 0x2004:    /* THREE-PER-EM SPACE */
2963             case 0x2005:    /* FOUR-PER-EM SPACE */
2964             case 0x2006:    /* SIX-PER-EM SPACE */
2965             case 0x2007:    /* FIGURE SPACE */
2966             case 0x2008:    /* PUNCTUATION SPACE */
2967             case 0x2009:    /* THIN SPACE */
2968             case 0x200A:    /* HAIR SPACE */
2969             case 0x202f:    /* NARROW NO-BREAK SPACE */
2970             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2971             case 0x3000:    /* IDEOGRAPHIC SPACE */
2972             break;
2973             }
2974           }
2975         break;
2976
2977         case OP_NOT_VSPACE:
2978         for (i = 1; i <= min; i++)
2979           {
2980           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2981           GETCHARINC(c, eptr);
2982           switch(c)
2983             {
2984             default: break;
2985             case 0x0a:      /* LF */
2986             case 0x0b:      /* VT */
2987             case 0x0c:      /* FF */
2988             case 0x0d:      /* CR */
2989             case 0x85:      /* NEL */
2990             case 0x2028:    /* LINE SEPARATOR */
2991             case 0x2029:    /* PARAGRAPH SEPARATOR */
2992             RRETURN(MATCH_NOMATCH);
2993             }
2994           }
2995         break;
2996
2997         case OP_VSPACE:
2998         for (i = 1; i <= min; i++)
2999           {
3000           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3001           GETCHARINC(c, eptr);
3002           switch(c)
3003             {
3004             default: RRETURN(MATCH_NOMATCH);
3005             case 0x0a:      /* LF */
3006             case 0x0b:      /* VT */
3007             case 0x0c:      /* FF */
3008             case 0x0d:      /* CR */
3009             case 0x85:      /* NEL */
3010             case 0x2028:    /* LINE SEPARATOR */
3011             case 0x2029:    /* PARAGRAPH SEPARATOR */
3012             break;
3013             }
3014           }
3015         break;
3016
3017         case OP_NOT_DIGIT:
3018         for (i = 1; i <= min; i++)
3019           {
3020           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3021           GETCHARINC(c, eptr);
3022           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3023             RRETURN(MATCH_NOMATCH);
3024           }
3025         break;
3026
3027         case OP_DIGIT:
3028         for (i = 1; i <= min; i++)
3029           {
3030           if (eptr >= md->end_subject ||
3031              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3032             RRETURN(MATCH_NOMATCH);
3033           /* No need to skip more bytes - we know it's a 1-byte character */
3034           }
3035         break;
3036
3037         case OP_NOT_WHITESPACE:
3038         for (i = 1; i <= min; i++)
3039           {
3040           if (eptr >= md->end_subject ||
3041              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3042             RRETURN(MATCH_NOMATCH);
3043           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3044           }
3045         break;
3046
3047         case OP_WHITESPACE:
3048         for (i = 1; i <= min; i++)
3049           {
3050           if (eptr >= md->end_subject ||
3051              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3052             RRETURN(MATCH_NOMATCH);
3053           /* No need to skip more bytes - we know it's a 1-byte character */
3054           }
3055         break;
3056
3057         case OP_NOT_WORDCHAR:
3058         for (i = 1; i <= min; i++)
3059           {
3060           if (eptr >= md->end_subject ||
3061              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3062             RRETURN(MATCH_NOMATCH);
3063           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3064           }
3065         break;
3066
3067         case OP_WORDCHAR:
3068         for (i = 1; i <= min; i++)
3069           {
3070           if (eptr >= md->end_subject ||
3071              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3072             RRETURN(MATCH_NOMATCH);
3073           /* No need to skip more bytes - we know it's a 1-byte character */
3074           }
3075         break;
3076
3077         default:
3078         RRETURN(PCRE_ERROR_INTERNAL);
3079         }  /* End switch(ctype) */
3080
3081       else
3082 #endif     /* SUPPORT_UTF8 */
3083
3084       /* Code for the non-UTF-8 case for minimum matching of operators other
3085       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3086       number of bytes present, as this was tested above. */
3087
3088       switch(ctype)
3089         {
3090         case OP_ANY:
3091         if ((ims & PCRE_DOTALL) == 0)
3092           {
3093           for (i = 1; i <= min; i++)
3094             {
3095             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3096             eptr++;
3097             }
3098           }
3099         else eptr += min;
3100         break;
3101
3102         case OP_ANYBYTE:
3103         eptr += min;
3104         break;
3105
3106         /* Because of the CRLF case, we can't assume the minimum number of
3107         bytes are present in this case. */
3108
3109         case OP_ANYNL:
3110         for (i = 1; i <= min; i++)
3111           {
3112           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3113           switch(*eptr++)
3114             {
3115             default: RRETURN(MATCH_NOMATCH);
3116             case 0x000d:
3117             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3118             break;
3119             case 0x000a:
3120             case 0x000b:
3121             case 0x000c:
3122             case 0x0085:
3123             break;
3124             }
3125           }
3126         break;
3127
3128         case OP_NOT_HSPACE:
3129         for (i = 1; i <= min; i++)
3130           {
3131           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3132           switch(*eptr++)
3133             {
3134             default: break;
3135             case 0x09:      /* HT */
3136             case 0x20:      /* SPACE */
3137             case 0xa0:      /* NBSP */
3138             RRETURN(MATCH_NOMATCH);
3139             }
3140           }
3141         break;
3142
3143         case OP_HSPACE:
3144         for (i = 1; i <= min; i++)
3145           {
3146           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3147           switch(*eptr++)
3148             {
3149             default: RRETURN(MATCH_NOMATCH);
3150             case 0x09:      /* HT */
3151             case 0x20:      /* SPACE */
3152             case 0xa0:      /* NBSP */
3153             break;
3154             }
3155           }
3156         break;
3157
3158         case OP_NOT_VSPACE:
3159         for (i = 1; i <= min; i++)
3160           {
3161           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3162           switch(*eptr++)
3163             {
3164             default: break;
3165             case 0x0a:      /* LF */
3166             case 0x0b:      /* VT */
3167             case 0x0c:      /* FF */
3168             case 0x0d:      /* CR */
3169             case 0x85:      /* NEL */
3170             RRETURN(MATCH_NOMATCH);
3171             }
3172           }
3173         break;
3174
3175         case OP_VSPACE:
3176         for (i = 1; i <= min; i++)
3177           {
3178           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3179           switch(*eptr++)
3180             {
3181             default: RRETURN(MATCH_NOMATCH);
3182             case 0x0a:      /* LF */
3183             case 0x0b:      /* VT */
3184             case 0x0c:      /* FF */
3185             case 0x0d:      /* CR */
3186             case 0x85:      /* NEL */
3187             break;
3188             }
3189           }
3190         break;
3191
3192         case OP_NOT_DIGIT:
3193         for (i = 1; i <= min; i++)
3194           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3195         break;
3196
3197         case OP_DIGIT:
3198         for (i = 1; i <= min; i++)
3199           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3200         break;
3201
3202         case OP_NOT_WHITESPACE:
3203         for (i = 1; i <= min; i++)
3204           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3205         break;
3206
3207         case OP_WHITESPACE:
3208         for (i = 1; i <= min; i++)
3209           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3210         break;
3211
3212         case OP_NOT_WORDCHAR:
3213         for (i = 1; i <= min; i++)
3214           if ((md->ctypes[*eptr++] & ctype_word) != 0)
3215             RRETURN(MATCH_NOMATCH);
3216         break;
3217
3218         case OP_WORDCHAR:
3219         for (i = 1; i <= min; i++)
3220           if ((md->ctypes[*eptr++] & ctype_word) == 0)
3221             RRETURN(MATCH_NOMATCH);
3222         break;
3223
3224         default:
3225         RRETURN(PCRE_ERROR_INTERNAL);
3226         }
3227       }
3228
3229     /* If min = max, continue at the same level without recursing */
3230
3231     if (min == max) continue;
3232
3233     /* If minimizing, we have to test the rest of the pattern before each
3234     subsequent match. Again, separate the UTF-8 case for speed, and also
3235     separate the UCP cases. */
3236
3237     if (minimize)
3238       {
3239 #ifdef SUPPORT_UCP
3240       if (prop_type >= 0)
3241         {
3242         switch(prop_type)
3243           {
3244           case PT_ANY:
3245           for (fi = min;; fi++)
3246             {
3247             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3248             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3250             GETCHARINC(c, eptr);
3251             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3252             }
3253           /* Control never gets here */
3254
3255           case PT_LAMP:
3256           for (fi = min;; fi++)
3257             {
3258             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3259             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3261             GETCHARINC(c, eptr);
3262             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3263             if ((prop_chartype == ucp_Lu ||
3264                  prop_chartype == ucp_Ll ||
3265                  prop_chartype == ucp_Lt) == prop_fail_result)
3266               RRETURN(MATCH_NOMATCH);
3267             }
3268           /* Control never gets here */
3269
3270           case PT_GC:
3271           for (fi = min;; fi++)
3272             {
3273             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3274             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3275             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3276             GETCHARINC(c, eptr);
3277             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3278             if ((prop_category == prop_value) == prop_fail_result)
3279               RRETURN(MATCH_NOMATCH);
3280             }
3281           /* Control never gets here */
3282
3283           case PT_PC:
3284           for (fi = min;; fi++)
3285             {
3286             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3287             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289             GETCHARINC(c, eptr);
3290             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3291             if ((prop_chartype == prop_value) == prop_fail_result)
3292               RRETURN(MATCH_NOMATCH);
3293             }
3294           /* Control never gets here */
3295
3296           case PT_SC:
3297           for (fi = min;; fi++)
3298             {
3299             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3300             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302             GETCHARINC(c, eptr);
3303             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3304             if ((prop_script == prop_value) == prop_fail_result)
3305               RRETURN(MATCH_NOMATCH);
3306             }
3307           /* Control never gets here */
3308
3309           default:
3310           RRETURN(PCRE_ERROR_INTERNAL);
3311           }
3312         }
3313
3314       /* Match extended Unicode sequences. We will get here only if the
3315       support is in the binary; otherwise a compile-time error occurs. */
3316
3317       else if (ctype == OP_EXTUNI)
3318         {
3319         for (fi = min;; fi++)
3320           {
3321           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3322           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3323           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3324           GETCHARINCTEST(c, eptr);
3325           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3326           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3327           while (eptr < md->end_subject)
3328             {
3329             int len = 1;
3330             if (!utf8) c = *eptr; else
3331               {
3332               GETCHARLEN(c, eptr, len);
3333               }
3334             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3335             if (prop_category != ucp_M) break;
3336             eptr += len;
3337             }
3338           }
3339         }
3340
3341       else
3342 #endif     /* SUPPORT_UCP */
3343
3344 #ifdef SUPPORT_UTF8
3345       /* UTF-8 mode */
3346       if (utf8)
3347         {
3348         for (fi = min;; fi++)
3349           {
3350           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3351           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3352           if (fi >= max || eptr >= md->end_subject ||
3353                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3354                 IS_NEWLINE(eptr)))
3355             RRETURN(MATCH_NOMATCH);
3356
3357           GETCHARINC(c, eptr);
3358           switch(ctype)
3359             {
3360             case OP_ANY:        /* This is the DOTALL case */
3361             break;
3362
3363             case OP_ANYBYTE:
3364             break;
3365
3366             case OP_ANYNL:
3367             switch(c)
3368               {
3369               default: RRETURN(MATCH_NOMATCH);
3370               case 0x000d:
3371               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3372               break;
3373               case 0x000a:
3374               case 0x000b:
3375               case 0x000c:
3376               case 0x0085:
3377               case 0x2028:
3378               case 0x2029:
3379               break;
3380               }
3381             break;
3382
3383             case OP_NOT_HSPACE:
3384             switch(c)
3385               {
3386               default: break;
3387               case 0x09:      /* HT */
3388               case 0x20:      /* SPACE */
3389               case 0xa0:      /* NBSP */
3390               case 0x1680:    /* OGHAM SPACE MARK */
3391               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3392               case 0x2000:    /* EN QUAD */
3393               case 0x2001:    /* EM QUAD */
3394               case 0x2002:    /* EN SPACE */
3395               case 0x2003:    /* EM SPACE */
3396               case 0x2004:    /* THREE-PER-EM SPACE */
3397               case 0x2005:    /* FOUR-PER-EM SPACE */
3398               case 0x2006:    /* SIX-PER-EM SPACE */
3399               case 0x2007:    /* FIGURE SPACE */
3400               case 0x2008:    /* PUNCTUATION SPACE */
3401               case 0x2009:    /* THIN SPACE */
3402               case 0x200A:    /* HAIR SPACE */
3403               case 0x202f:    /* NARROW NO-BREAK SPACE */
3404               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3405               case 0x3000:    /* IDEOGRAPHIC SPACE */
3406               RRETURN(MATCH_NOMATCH);
3407               }
3408             break;
3409
3410             case OP_HSPACE:
3411             switch(c)
3412               {
3413               default: RRETURN(MATCH_NOMATCH);
3414               case 0x09:      /* HT */
3415               case 0x20:      /* SPACE */
3416               case 0xa0:      /* NBSP */
3417               case 0x1680:    /* OGHAM SPACE MARK */
3418               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3419               case 0x2000:    /* EN QUAD */
3420               case 0x2001:    /* EM QUAD */
3421               case 0x2002:    /* EN SPACE */
3422               case 0x2003:    /* EM SPACE */
3423               case 0x2004:    /* THREE-PER-EM SPACE */
3424               case 0x2005:    /* FOUR-PER-EM SPACE */
3425               case 0x2006:    /* SIX-PER-EM SPACE */
3426               case 0x2007:    /* FIGURE SPACE */
3427               case 0x2008:    /* PUNCTUATION SPACE */
3428               case 0x2009:    /* THIN SPACE */
3429               case 0x200A:    /* HAIR SPACE */
3430               case 0x202f:    /* NARROW NO-BREAK SPACE */
3431               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3432               case 0x3000:    /* IDEOGRAPHIC SPACE */
3433               break;
3434               }
3435             break;
3436
3437             case OP_NOT_VSPACE:
3438             switch(c)
3439               {
3440               default: break;
3441               case 0x0a:      /* LF */
3442               case 0x0b:      /* VT */
3443               case 0x0c:      /* FF */
3444               case 0x0d:      /* CR */
3445               case 0x85:      /* NEL */
3446               case 0x2028:    /* LINE SEPARATOR */
3447               case 0x2029:    /* PARAGRAPH SEPARATOR */
3448               RRETURN(MATCH_NOMATCH);
3449               }
3450             break;
3451
3452             case OP_VSPACE:
3453             switch(c)
3454               {
3455               default: RRETURN(MATCH_NOMATCH);
3456               case 0x0a:      /* LF */
3457               case 0x0b:      /* VT */
3458               case 0x0c:      /* FF */
3459               case 0x0d:      /* CR */
3460               case 0x85:      /* NEL */
3461               case 0x2028:    /* LINE SEPARATOR */
3462               case 0x2029:    /* PARAGRAPH SEPARATOR */
3463               break;
3464               }
3465             break;
3466
3467             case OP_NOT_DIGIT:
3468             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3469               RRETURN(MATCH_NOMATCH);
3470             break;
3471
3472             case OP_DIGIT:
3473             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3474               RRETURN(MATCH_NOMATCH);
3475             break;
3476
3477             case OP_NOT_WHITESPACE:
3478             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3479               RRETURN(MATCH_NOMATCH);
3480             break;
3481
3482             case OP_WHITESPACE:
3483             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3484               RRETURN(MATCH_NOMATCH);
3485             break;
3486
3487             case OP_NOT_WORDCHAR:
3488             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3489               RRETURN(MATCH_NOMATCH);
3490             break;
3491
3492             case OP_WORDCHAR:
3493             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3494               RRETURN(MATCH_NOMATCH);
3495             break;
3496
3497             default:
3498             RRETURN(PCRE_ERROR_INTERNAL);
3499             }
3500           }
3501         }
3502       else
3503 #endif
3504       /* Not UTF-8 mode */
3505         {
3506         for (fi = min;; fi++)
3507           {
3508           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3509           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3510           if (fi >= max || eptr >= md->end_subject ||
3511                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3512             RRETURN(MATCH_NOMATCH);
3513
3514           c = *eptr++;
3515           switch(ctype)
3516             {
3517             case OP_ANY:   /* This is the DOTALL case */
3518             break;
3519
3520             case OP_ANYBYTE:
3521             break;
3522
3523             case OP_ANYNL:
3524             switch(c)
3525               {
3526               default: RRETURN(MATCH_NOMATCH);
3527               case 0x000d:
3528               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3529               break;
3530               case 0x000a:
3531               case 0x000b:
3532               case 0x000c:
3533               case 0x0085:
3534               break;
3535               }
3536             break;
3537
3538             case OP_NOT_HSPACE:
3539             switch(c)
3540               {
3541               default: break;
3542               case 0x09:      /* HT */
3543               case 0x20:      /* SPACE */
3544               case 0xa0:      /* NBSP */
3545               RRETURN(MATCH_NOMATCH);
3546               }
3547             break;
3548
3549             case OP_HSPACE:
3550             switch(c)
3551               {
3552               default: RRETURN(MATCH_NOMATCH);
3553               case 0x09:      /* HT */
3554               case 0x20:      /* SPACE */
3555               case 0xa0:      /* NBSP */
3556               break;
3557               }
3558             break;
3559
3560             case OP_NOT_VSPACE:
3561             switch(c)
3562               {
3563               default: break;
3564               case 0x0a:      /* LF */
3565               case 0x0b:      /* VT */
3566               case 0x0c:      /* FF */
3567               case 0x0d:      /* CR */
3568               case 0x85:      /* NEL */
3569               RRETURN(MATCH_NOMATCH);
3570               }
3571             break;
3572
3573             case OP_VSPACE:
3574             switch(c)
3575               {
3576               default: RRETURN(MATCH_NOMATCH);
3577               case 0x0a:      /* LF */
3578               case 0x0b:      /* VT */
3579               case 0x0c:      /* FF */
3580               case 0x0d:      /* CR */
3581               case 0x85:      /* NEL */
3582               break;
3583               }
3584             break;
3585
3586             case OP_NOT_DIGIT:
3587             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3588             break;
3589
3590             case OP_DIGIT:
3591             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3592             break;
3593
3594             case OP_NOT_WHITESPACE:
3595             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3596             break;
3597
3598             case OP_WHITESPACE:
3599             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3600             break;
3601
3602             case OP_NOT_WORDCHAR:
3603             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3604             break;
3605
3606             case OP_WORDCHAR:
3607             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3608             break;
3609
3610             default:
3611             RRETURN(PCRE_ERROR_INTERNAL);
3612             }
3613           }
3614         }
3615       /* Control never gets here */
3616       }
3617
3618     /* If maximizing, it is worth using inline code for speed, doing the type
3619     test once at the start (i.e. keep it out of the loop). Again, keep the
3620     UTF-8 and UCP stuff separate. */
3621
3622     else
3623       {
3624       pp = eptr;  /* Remember where we started */
3625
3626 #ifdef SUPPORT_UCP
3627       if (prop_type >= 0)
3628         {
3629         switch(prop_type)
3630           {
3631           case PT_ANY:
3632           for (i = min; i < max; i++)
3633             {
3634             int len = 1;
3635             if (eptr >= md->end_subject) break;
3636             GETCHARLEN(c, eptr, len);
3637             if (prop_fail_result) break;
3638             eptr+= len;
3639             }
3640           break;
3641
3642           case PT_LAMP:
3643           for (i = min; i < max; i++)
3644             {
3645             int len = 1;
3646             if (eptr >= md->end_subject) break;
3647             GETCHARLEN(c, eptr, len);
3648             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3649             if ((prop_chartype == ucp_Lu ||
3650                  prop_chartype == ucp_Ll ||
3651                  prop_chartype == ucp_Lt) == prop_fail_result)
3652               break;
3653             eptr+= len;
3654             }
3655           break;
3656
3657           case PT_GC:
3658           for (i = min; i < max; i++)
3659             {
3660             int len = 1;
3661             if (eptr >= md->end_subject) break;
3662             GETCHARLEN(c, eptr, len);
3663             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3664             if ((prop_category == prop_value) == prop_fail_result)
3665               break;
3666             eptr+= len;
3667             }
3668           break;
3669
3670           case PT_PC:
3671           for (i = min; i < max; i++)
3672             {
3673             int len = 1;
3674             if (eptr >= md->end_subject) break;
3675             GETCHARLEN(c, eptr, len);
3676             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3677             if ((prop_chartype == prop_value) == prop_fail_result)
3678               break;
3679             eptr+= len;
3680             }
3681           break;
3682
3683           case PT_SC:
3684           for (i = min; i < max; i++)
3685             {
3686             int len = 1;
3687             if (eptr >= md->end_subject) break;
3688             GETCHARLEN(c, eptr, len);
3689             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3690             if ((prop_script == prop_value) == prop_fail_result)
3691               break;
3692             eptr+= len;
3693             }
3694           break;
3695           }
3696
3697         /* eptr is now past the end of the maximum run */
3698
3699         if (possessive) continue;
3700         for(;;)
3701           {
3702           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3703           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3704           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3705           BACKCHAR(eptr);
3706           }
3707         }
3708
3709       /* Match extended Unicode sequences. We will get here only if the
3710       support is in the binary; otherwise a compile-time error occurs. */
3711
3712       else if (ctype == OP_EXTUNI)
3713         {
3714         for (i = min; i < max; i++)
3715           {
3716           if (eptr >= md->end_subject) break;
3717           GETCHARINCTEST(c, eptr);
3718           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3719           if (prop_category == ucp_M) break;
3720           while (eptr < md->end_subject)
3721             {
3722             int len = 1;
3723             if (!utf8) c = *eptr; else
3724               {
3725               GETCHARLEN(c, eptr, len);
3726               }
3727             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3728             if (prop_category != ucp_M) break;
3729             eptr += len;
3730             }
3731           }
3732
3733         /* eptr is now past the end of the maximum run */
3734
3735         if (possessive) continue;
3736         for(;;)
3737           {
3738           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3739           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3740           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3741           for (;;)                        /* Move back over one extended */
3742             {
3743             int len = 1;
3744             BACKCHAR(eptr);
3745             if (!utf8) c = *eptr; else
3746               {
3747               GETCHARLEN(c, eptr, len);
3748               }
3749             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3750             if (prop_category != ucp_M) break;
3751             eptr--;
3752             }
3753           }
3754         }
3755
3756       else
3757 #endif   /* SUPPORT_UCP */
3758
3759 #ifdef SUPPORT_UTF8
3760       /* UTF-8 mode */
3761
3762       if (utf8)
3763         {
3764         switch(ctype)
3765           {
3766           case OP_ANY:
3767
3768           /* Special code is required for UTF8, but when the maximum is
3769           unlimited we don't need it, so we repeat the non-UTF8 code. This is
3770           probably worth it, because .* is quite a common idiom. */
3771
3772           if (max < INT_MAX)
3773             {
3774             if ((ims & PCRE_DOTALL) == 0)
3775               {
3776               for (i = min; i < max; i++)
3777                 {
3778                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3779                 eptr++;
3780                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3781                 }
3782               }
3783             else
3784               {
3785               for (i = min; i < max; i++)
3786                 {
3787                 if (eptr >= md->end_subject) break;
3788                 eptr++;
3789                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3790                 }
3791               }
3792             }
3793
3794           /* Handle unlimited UTF-8 repeat */
3795
3796           else
3797             {
3798             if ((ims & PCRE_DOTALL) == 0)
3799               {
3800               for (i = min; i < max; i++)
3801                 {
3802                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3803                 eptr++;
3804                 }
3805               break;
3806               }
3807             else
3808               {
3809               c = max - min;
3810               if (c > (unsigned int)(md->end_subject - eptr))
3811                 c = md->end_subject - eptr;
3812               eptr += c;
3813               }
3814             }
3815           break;
3816
3817           /* The byte case is the same as non-UTF8 */
3818
3819           case OP_ANYBYTE:
3820           c = max - min;
3821           if (c > (unsigned int)(md->end_subject - eptr))
3822             c = md->end_subject - eptr;
3823           eptr += c;
3824           break;
3825
3826           case OP_ANYNL:
3827           for (i = min; i < max; i++)
3828             {
3829             int len = 1;
3830             if (eptr >= md->end_subject) break;
3831             GETCHARLEN(c, eptr, len);
3832             if (c == 0x000d)
3833               {
3834               if (++eptr >= md->end_subject) break;
3835               if (*eptr == 0x000a) eptr++;
3836               }
3837             else
3838               {
3839               if (c != 0x000a && c != 0x000b && c != 0x000c &&
3840                   c != 0x0085 && c != 0x2028 && c != 0x2029)
3841                 break;
3842               eptr += len;
3843               }
3844             }
3845           break;
3846
3847           case OP_NOT_HSPACE:
3848           case OP_HSPACE:
3849           for (i = min; i < max; i++)
3850             {
3851             BOOL gotspace;
3852             int len = 1;
3853             if (eptr >= md->end_subject) break;
3854             GETCHARLEN(c, eptr, len);
3855             switch(c)
3856               {
3857               default: gotspace = FALSE; break;
3858               case 0x09:      /* HT */
3859               case 0x20:      /* SPACE */
3860               case 0xa0:      /* NBSP */
3861               case 0x1680:    /* OGHAM SPACE MARK */
3862               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3863               case 0x2000:    /* EN QUAD */
3864               case 0x2001:    /* EM QUAD */
3865               case 0x2002:    /* EN SPACE */
3866               case 0x2003:    /* EM SPACE */
3867               case 0x2004:    /* THREE-PER-EM SPACE */
3868               case 0x2005:    /* FOUR-PER-EM SPACE */
3869               case 0x2006:    /* SIX-PER-EM SPACE */
3870               case 0x2007:    /* FIGURE SPACE */
3871               case 0x2008:    /* PUNCTUATION SPACE */
3872               case 0x2009:    /* THIN SPACE */
3873               case 0x200A:    /* HAIR SPACE */
3874               case 0x202f:    /* NARROW NO-BREAK SPACE */
3875               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3876               case 0x3000:    /* IDEOGRAPHIC SPACE */
3877               gotspace = TRUE;
3878               break;
3879               }
3880             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3881             eptr += len;
3882             }
3883           break;
3884
3885           case OP_NOT_VSPACE:
3886           case OP_VSPACE:
3887           for (i = min; i < max; i++)
3888             {
3889             BOOL gotspace;
3890             int len = 1;
3891             if (eptr >= md->end_subject) break;
3892             GETCHARLEN(c, eptr, len);
3893             switch(c)
3894               {
3895               default: gotspace = FALSE; break;
3896               case 0x0a:      /* LF */
3897               case 0x0b:      /* VT */
3898               case 0x0c:      /* FF */
3899               case 0x0d:      /* CR */
3900               case 0x85:      /* NEL */
3901               case 0x2028:    /* LINE SEPARATOR */
3902               case 0x2029:    /* PARAGRAPH SEPARATOR */
3903               gotspace = TRUE;
3904               break;
3905               }
3906             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3907             eptr += len;
3908             }
3909           break;
3910
3911           case OP_NOT_DIGIT:
3912           for (i = min; i < max; i++)
3913             {
3914             int len = 1;
3915             if (eptr >= md->end_subject) break;
3916             GETCHARLEN(c, eptr, len);
3917             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3918             eptr+= len;
3919             }
3920           break;
3921
3922           case OP_DIGIT:
3923           for (i = min; i < max; i++)
3924             {
3925             int len = 1;
3926             if (eptr >= md->end_subject) break;
3927             GETCHARLEN(c, eptr, len);
3928             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3929             eptr+= len;
3930             }
3931           break;
3932
3933           case OP_NOT_WHITESPACE:
3934           for (i = min; i < max; i++)
3935             {
3936             int len = 1;
3937             if (eptr >= md->end_subject) break;
3938             GETCHARLEN(c, eptr, len);
3939             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3940             eptr+= len;
3941             }
3942           break;
3943
3944           case OP_WHITESPACE:
3945           for (i = min; i < max; i++)
3946             {
3947             int len = 1;
3948             if (eptr >= md->end_subject) break;
3949             GETCHARLEN(c, eptr, len);
3950             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3951             eptr+= len;
3952             }
3953           break;
3954
3955           case OP_NOT_WORDCHAR:
3956           for (i = min; i < max; i++)
3957             {
3958             int len = 1;
3959             if (eptr >= md->end_subject) break;
3960             GETCHARLEN(c, eptr, len);
3961             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3962             eptr+= len;
3963             }
3964           break;
3965
3966           case OP_WORDCHAR:
3967           for (i = min; i < max; i++)
3968             {
3969             int len = 1;
3970             if (eptr >= md->end_subject) break;
3971             GETCHARLEN(c, eptr, len);
3972             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3973             eptr+= len;
3974             }
3975           break;
3976
3977           default:
3978           RRETURN(PCRE_ERROR_INTERNAL);
3979           }
3980
3981         /* eptr is now past the end of the maximum run */
3982
3983         if (possessive) continue;
3984         for(;;)
3985           {
3986           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3987           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3988           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3989           BACKCHAR(eptr);
3990           }
3991         }
3992       else
3993 #endif
3994
3995       /* Not UTF-8 mode */
3996         {
3997         switch(ctype)
3998           {
3999           case OP_ANY:
4000           if ((ims & PCRE_DOTALL) == 0)
4001             {
4002             for (i = min; i < max; i++)
4003               {
4004               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4005               eptr++;
4006               }
4007             break;
4008             }
4009           /* For DOTALL case, fall through and treat as \C */
4010
4011           case OP_ANYBYTE:
4012           c = max - min;
4013           if (c > (unsigned int)(md->end_subject - eptr))
4014             c = md->end_subject - eptr;
4015           eptr += c;
4016           break;
4017
4018           case OP_ANYNL:
4019           for (i = min; i < max; i++)
4020             {
4021             if (eptr >= md->end_subject) break;
4022             c = *eptr;
4023             if (c == 0x000d)
4024               {
4025               if (++eptr >= md->end_subject) break;
4026               if (*eptr == 0x000a) eptr++;
4027               }
4028             else
4029               {
4030               if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4031                 break;
4032               eptr++;
4033               }
4034             }
4035           break;
4036
4037           case OP_NOT_HSPACE:
4038           for (i = min; i < max; i++)
4039             {
4040             if (eptr >= md->end_subject) break;
4041             c = *eptr;
4042             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4043             eptr++;
4044             }
4045           break;
4046
4047           case OP_HSPACE:
4048           for (i = min; i < max; i++)
4049             {
4050             if (eptr >= md->end_subject) break;
4051             c = *eptr;
4052             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4053             eptr++;
4054             }
4055           break;
4056
4057           case OP_NOT_VSPACE:
4058           for (i = min; i < max; i++)
4059             {
4060             if (eptr >= md->end_subject) break;
4061             c = *eptr;
4062             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4063               break;
4064             eptr++;
4065             }
4066           break;
4067
4068           case OP_VSPACE:
4069           for (i = min; i < max; i++)
4070             {
4071             if (eptr >= md->end_subject) break;
4072             c = *eptr;
4073             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4074               break;
4075             eptr++;
4076             }
4077           break;
4078
4079           case OP_NOT_DIGIT:
4080           for (i = min; i < max; i++)
4081             {
4082             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4083               break;
4084             eptr++;
4085             }
4086           break;
4087
4088           case OP_DIGIT:
4089           for (i = min; i < max; i++)
4090             {
4091             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4092               break;
4093             eptr++;
4094             }
4095           break;
4096
4097           case OP_NOT_WHITESPACE:
4098           for (i = min; i < max; i++)
4099             {
4100             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4101               break;
4102             eptr++;
4103             }
4104           break;
4105
4106           case OP_WHITESPACE:
4107           for (i = min; i < max; i++)
4108             {
4109             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4110               break;
4111             eptr++;
4112             }
4113           break;
4114
4115           case OP_NOT_WORDCHAR:
4116           for (i = min; i < max; i++)
4117             {
4118             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4119               break;
4120             eptr++;
4121             }
4122           break;
4123
4124           case OP_WORDCHAR:
4125           for (i = min; i < max; i++)
4126             {
4127             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4128               break;
4129             eptr++;
4130             }
4131           break;
4132
4133           default:
4134           RRETURN(PCRE_ERROR_INTERNAL);
4135           }
4136
4137         /* eptr is now past the end of the maximum run */
4138
4139         if (possessive) continue;
4140         while (eptr >= pp)
4141           {
4142           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4143           eptr--;
4144           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4145           }
4146         }
4147
4148       /* Get here if we can't make it match with any permitted repetitions */
4149
4150       RRETURN(MATCH_NOMATCH);
4151       }
4152     /* Control never gets here */
4153
4154     /* There's been some horrible disaster. Arrival here can only mean there is
4155     something seriously wrong in the code above or the OP_xxx definitions. */
4156
4157     default:
4158     DPRINTF(("Unknown opcode %d\n", *ecode));
4159     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4160     }
4161
4162   /* Do not stick any code in here without much thought; it is assumed
4163   that "continue" in the code above comes out to here to repeat the main
4164   loop. */
4165
4166   }             /* End of main loop */
4167 /* Control never reaches here */
4168
4169
4170 /* When compiling to use the heap rather than the stack for recursive calls to
4171 match(), the RRETURN() macro jumps here. The number that is saved in
4172 frame->Xwhere indicates which label we actually want to return to. */
4173
4174 #ifdef NO_RECURSE
4175 #define LBL(val) case val: goto L_RM##val;
4176 HEAP_RETURN:
4177 switch (frame->Xwhere)
4178   {
4179   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4180   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4181   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4182   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4183   LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4184   LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4185   default:
4186   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4187   return PCRE_ERROR_INTERNAL;
4188   }
4189 #undef LBL
4190 #endif  /* NO_RECURSE */
4191 }
4192
4193
4194 /***************************************************************************
4195 ****************************************************************************
4196                    RECURSION IN THE match() FUNCTION
4197
4198 Undefine all the macros that were defined above to handle this. */
4199
4200 #ifdef NO_RECURSE
4201 #undef eptr
4202 #undef ecode
4203 #undef mstart
4204 #undef offset_top
4205 #undef ims
4206 #undef eptrb
4207 #undef flags
4208
4209 #undef callpat
4210 #undef charptr
4211 #undef data
4212 #undef next
4213 #undef pp
4214 #undef prev
4215 #undef saved_eptr
4216
4217 #undef new_recursive
4218
4219 #undef cur_is_word
4220 #undef condition
4221 #undef prev_is_word
4222
4223 #undef original_ims
4224
4225 #undef ctype
4226 #undef length
4227 #undef max
4228 #undef min
4229 #undef number
4230 #undef offset
4231 #undef op
4232 #undef save_capture_last
4233 #undef save_offset1
4234 #undef save_offset2
4235 #undef save_offset3
4236 #undef stacksave
4237
4238 #undef newptrb
4239
4240 #endif
4241
4242 /* These two are defined as macros in both cases */
4243
4244 #undef fc
4245 #undef fi
4246
4247 /***************************************************************************
4248 ***************************************************************************/
4249
4250
4251
4252 /*************************************************
4253 *         Execute a Regular Expression           *
4254 *************************************************/
4255
4256 /* This function applies a compiled re to a subject string and picks out
4257 portions of the string if it matches. Two elements in the vector are set for
4258 each substring: the offsets to the start and end of the substring.
4259
4260 Arguments:
4261   argument_re     points to the compiled expression
4262   extra_data      points to extra data or is NULL
4263   subject         points to the subject string
4264   length          length of subject string (may contain binary zeros)
4265   start_offset    where to start in the subject string
4266   options         option bits
4267   offsets         points to a vector of ints to be filled in with offsets
4268   offsetcount     the number of elements in the vector
4269
4270 Returns:          > 0 => success; value is the number of elements filled in
4271                   = 0 => success, but offsets is not big enough
4272                    -1 => failed to match
4273                  < -1 => some kind of unexpected problem
4274 */
4275
4276 PCRE_EXP_DEFN int
4277 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4278   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4279   int offsetcount)
4280 {
4281 int rc, resetcount, ocount;
4282 int first_byte = -1;
4283 int req_byte = -1;
4284 int req_byte2 = -1;
4285 int newline;
4286 unsigned long int ims;
4287 BOOL using_temporary_offsets = FALSE;
4288 BOOL anchored;
4289 BOOL startline;
4290 BOOL firstline;
4291 BOOL first_byte_caseless = FALSE;
4292 BOOL req_byte_caseless = FALSE;
4293 BOOL utf8;
4294 match_data match_block;
4295 match_data *md = &match_block;
4296 const uschar *tables;
4297 const uschar *start_bits = NULL;
4298 USPTR start_match = (USPTR)subject + start_offset;
4299 USPTR end_subject;
4300 USPTR req_byte_ptr = start_match - 1;
4301 eptrblock eptrchain[EPTR_WORK_SIZE];
4302
4303 pcre_study_data internal_study;
4304 const pcre_study_data *study;
4305
4306 real_pcre internal_re;
4307 const real_pcre *external_re = (const real_pcre *)argument_re;
4308 const real_pcre *re = external_re;
4309
4310 /* Plausibility checks */
4311
4312 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4313 if (re == NULL || subject == NULL ||
4314    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4315 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4316
4317 /* Fish out the optional data from the extra_data structure, first setting
4318 the default values. */
4319
4320 study = NULL;
4321 md->match_limit = MATCH_LIMIT;
4322 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4323 md->callout_data = NULL;
4324
4325 /* The table pointer is always in native byte order. */
4326
4327 tables = external_re->tables;
4328
4329 if (extra_data != NULL)
4330   {
4331   register unsigned int flags = extra_data->flags;
4332   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4333     study = (const pcre_study_data *)extra_data->study_data;
4334   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4335     md->match_limit = extra_data->match_limit;
4336   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4337     md->match_limit_recursion = extra_data->match_limit_recursion;
4338   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4339     md->callout_data = extra_data->callout_data;
4340   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4341   }
4342
4343 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4344 is a feature that makes it possible to save compiled regex and re-use them
4345 in other programs later. */
4346
4347 if (tables == NULL) tables = _pcre_default_tables;
4348
4349 /* Check that the first field in the block is the magic number. If it is not,
4350 test for a regex that was compiled on a host of opposite endianness. If this is
4351 the case, flipped values are put in internal_re and internal_study if there was
4352 study data too. */
4353
4354 if (re->magic_number != MAGIC_NUMBER)
4355   {
4356   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4357   if (re == NULL) return PCRE_ERROR_BADMAGIC;
4358   if (study != NULL) study = &internal_study;
4359   }
4360
4361 /* Set up other data */
4362
4363 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4364 startline = (re->options & PCRE_STARTLINE) != 0;
4365 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4366
4367 /* The code starts after the real_pcre block and the capture name table. */
4368
4369 md->start_code = (const uschar *)external_re + re->name_table_offset +
4370   re->name_count * re->name_entry_size;
4371
4372 md->start_subject = (USPTR)subject;
4373 md->start_offset = start_offset;
4374 md->end_subject = md->start_subject + length;
4375 end_subject = md->end_subject;
4376
4377 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4378 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4379
4380 md->notbol = (options & PCRE_NOTBOL) != 0;
4381 md->noteol = (options & PCRE_NOTEOL) != 0;
4382 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4383 md->partial = (options & PCRE_PARTIAL) != 0;
4384 md->hitend = FALSE;
4385
4386 md->recursive = NULL;                   /* No recursion at top level */
4387 md->eptrchain = eptrchain;              /* Make workspace generally available */
4388
4389 md->lcc = tables + lcc_offset;
4390 md->ctypes = tables + ctypes_offset;
4391
4392 /* Handle different types of newline. The three bits give eight cases. If
4393 nothing is set at run time, whatever was used at compile time applies. */
4394
4395 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4396        PCRE_NEWLINE_BITS)
4397   {
4398   case 0: newline = NEWLINE; break;   /* Compile-time default */
4399   case PCRE_NEWLINE_CR: newline = '\r'; break;
4400   case PCRE_NEWLINE_LF: newline = '\n'; break;
4401   case PCRE_NEWLINE_CR+
4402        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4403   case PCRE_NEWLINE_ANY: newline = -1; break;
4404   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4405   default: return PCRE_ERROR_BADNEWLINE;
4406   }
4407
4408 if (newline == -2)
4409   {
4410   md->nltype = NLTYPE_ANYCRLF;
4411   }
4412 else if (newline < 0)
4413   {
4414   md->nltype = NLTYPE_ANY;
4415   }
4416 else
4417   {
4418   md->nltype = NLTYPE_FIXED;
4419   if (newline > 255)
4420     {
4421     md->nllen = 2;
4422     md->nl[0] = (newline >> 8) & 255;
4423     md->nl[1] = newline & 255;
4424     }
4425   else
4426     {
4427     md->nllen = 1;
4428     md->nl[0] = newline;
4429     }
4430   }
4431
4432 /* Partial matching is supported only for a restricted set of regexes at the
4433 moment. */
4434
4435 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4436   return PCRE_ERROR_BADPARTIAL;
4437
4438 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4439 back the character offset. */
4440
4441 #ifdef SUPPORT_UTF8
4442 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4443   {
4444   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4445     return PCRE_ERROR_BADUTF8;
4446   if (start_offset > 0 && start_offset < length)
4447     {
4448     int tb = ((uschar *)subject)[start_offset];
4449     if (tb > 127)
4450       {
4451       tb &= 0xc0;
4452       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4453       }
4454     }
4455   }
4456 #endif
4457
4458 /* The ims options can vary during the matching as a result of the presence
4459 of (?ims) items in the pattern. They are kept in a local variable so that
4460 restoring at the exit of a group is easy. */
4461
4462 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4463
4464 /* If the expression has got more back references than the offsets supplied can
4465 hold, we get a temporary chunk of working store to use during the matching.
4466 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4467 of 3. */
4468
4469 ocount = offsetcount - (offsetcount % 3);
4470
4471 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4472   {
4473   ocount = re->top_backref * 3 + 3;
4474   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4475   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4476   using_temporary_offsets = TRUE;
4477   DPRINTF(("Got memory to hold back references\n"));
4478   }
4479 else md->offset_vector = offsets;
4480
4481 md->offset_end = ocount;
4482 md->offset_max = (2*ocount)/3;
4483 md->offset_overflow = FALSE;
4484 md->capture_last = -1;
4485
4486 /* Compute the minimum number of offsets that we need to reset each time. Doing
4487 this makes a huge difference to execution time when there aren't many brackets
4488 in the pattern. */
4489
4490 resetcount = 2 + re->top_bracket * 2;
4491 if (resetcount > offsetcount) resetcount = ocount;
4492
4493 /* Reset the working variable associated with each extraction. These should
4494 never be used unless previously set, but they get saved and restored, and so we
4495 initialize them to avoid reading uninitialized locations. */
4496
4497 if (md->offset_vector != NULL)
4498   {
4499   register int *iptr = md->offset_vector + ocount;
4500   register int *iend = iptr - resetcount/2 + 1;
4501   while (--iptr >= iend) *iptr = -1;
4502   }
4503
4504 /* Set up the first character to match, if available. The first_byte value is
4505 never set for an anchored regular expression, but the anchoring may be forced
4506 at run time, so we have to test for anchoring. The first char may be unset for
4507 an unanchored pattern, of course. If there's no first char and the pattern was
4508 studied, there may be a bitmap of possible first characters. */
4509
4510 if (!anchored)
4511   {
4512   if ((re->options & PCRE_FIRSTSET) != 0)
4513     {
4514     first_byte = re->first_byte & 255;
4515     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4516       first_byte = md->lcc[first_byte];
4517     }
4518   else
4519     if (!startline && study != NULL &&
4520       (study->options & PCRE_STUDY_MAPPED) != 0)
4521         start_bits = study->start_bits;
4522   }
4523
4524 /* For anchored or unanchored matches, there may be a "last known required
4525 character" set. */
4526
4527 if ((re->options & PCRE_REQCHSET) != 0)
4528   {
4529   req_byte = re->req_byte & 255;
4530   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4531   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4532   }
4533
4534
4535 /* ==========================================================================*/
4536
4537 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4538 the loop runs just once. */
4539
4540 for(;;)
4541   {
4542   USPTR save_end_subject = end_subject;
4543
4544   /* Reset the maximum number of extractions we might see. */
4545
4546   if (md->offset_vector != NULL)
4547     {
4548     register int *iptr = md->offset_vector;
4549     register int *iend = iptr + resetcount;
4550     while (iptr < iend) *iptr++ = -1;
4551     }
4552
4553   /* Advance to a unique first char if possible. If firstline is TRUE, the
4554   start of the match is constrained to the first line of a multiline string.
4555   That is, the match must be before or at the first newline. Implement this by
4556   temporarily adjusting end_subject so that we stop scanning at a newline. If
4557   the match fails at the newline, later code breaks this loop. */
4558
4559   if (firstline)
4560     {
4561     USPTR t = start_match;
4562     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4563     end_subject = t;
4564     }
4565
4566   /* Now test for a unique first byte */
4567
4568   if (first_byte >= 0)
4569     {
4570     if (first_byte_caseless)
4571       while (start_match < end_subject &&
4572              md->lcc[*start_match] != first_byte)
4573         start_match++;
4574     else
4575       while (start_match < end_subject && *start_match != first_byte)
4576         start_match++;
4577     }
4578
4579   /* Or to just after a linebreak for a multiline match if possible */
4580
4581   else if (startline)
4582     {
4583     if (start_match > md->start_subject + start_offset)
4584       {
4585       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4586         start_match++;
4587
4588       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4589       and we are now at a LF, advance the match position by one more character.
4590       */
4591
4592       if (start_match[-1] == '\r' &&
4593            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4594            start_match < end_subject &&
4595            *start_match == '\n')
4596         start_match++;
4597       }
4598     }
4599
4600   /* Or to a non-unique first char after study */
4601
4602   else if (start_bits != NULL)
4603     {
4604     while (start_match < end_subject)
4605       {
4606       register unsigned int c = *start_match;
4607       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4608       }
4609     }
4610
4611   /* Restore fudged end_subject */
4612
4613   end_subject = save_end_subject;
4614
4615 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4616   printf(">>>> Match against: ");
4617   pchars(start_match, end_subject - start_match, TRUE, md);
4618   printf("\n");
4619 #endif
4620
4621   /* If req_byte is set, we know that that character must appear in the subject
4622   for the match to succeed. If the first character is set, req_byte must be
4623   later in the subject; otherwise the test starts at the match point. This
4624   optimization can save a huge amount of backtracking in patterns with nested
4625   unlimited repeats that aren't going to match. Writing separate code for
4626   cased/caseless versions makes it go faster, as does using an autoincrement
4627   and backing off on a match.
4628
4629   HOWEVER: when the subject string is very, very long, searching to its end can
4630   take a long time, and give bad performance on quite ordinary patterns. This
4631   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4632   string... so we don't do this when the string is sufficiently long.
4633
4634   ALSO: this processing is disabled when partial matching is requested.
4635   */
4636
4637   if (req_byte >= 0 &&
4638       end_subject - start_match < REQ_BYTE_MAX &&
4639       !md->partial)
4640     {
4641     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4642
4643     /* We don't need to repeat the search if we haven't yet reached the
4644     place we found it at last time. */
4645
4646     if (p > req_byte_ptr)
4647       {
4648       if (req_byte_caseless)
4649         {
4650         while (p < end_subject)
4651           {
4652           register int pp = *p++;
4653           if (pp == req_byte || pp == req_byte2) { p--; break; }
4654           }
4655         }
4656       else
4657         {
4658         while (p < end_subject)
4659           {
4660           if (*p++ == req_byte) { p--; break; }
4661           }
4662         }
4663
4664       /* If we can't find the required character, break the matching loop,
4665       forcing a match failure. */
4666
4667       if (p >= end_subject)
4668         {
4669         rc = MATCH_NOMATCH;
4670         break;
4671         }
4672
4673       /* If we have found the required character, save the point where we
4674       found it, so that we don't search again next time round the loop if
4675       the start hasn't passed this character yet. */
4676
4677       req_byte_ptr = p;
4678       }
4679     }
4680
4681   /* OK, we can now run the match. */
4682
4683   md->start_match_ptr = start_match;      /* Insurance */
4684   md->match_call_count = 0;
4685   md->eptrn = 0;                          /* Next free eptrchain slot */
4686   rc = match(start_match, md->start_code, start_match, 2, md,
4687     ims, NULL, 0, 0);
4688
4689   /* Any return other than MATCH_NOMATCH breaks the loop. */
4690
4691   if (rc != MATCH_NOMATCH) break;
4692
4693   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4694   newline in the subject (though it may continue over the newline). Therefore,
4695   if we have just failed to match, starting at a newline, do not continue. */
4696
4697   if (firstline && IS_NEWLINE(start_match)) break;
4698
4699   /* Advance the match position by one character. */
4700
4701   start_match++;
4702 #ifdef SUPPORT_UTF8
4703   if (utf8)
4704     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4705       start_match++;
4706 #endif
4707
4708   /* Break the loop if the pattern is anchored or if we have passed the end of
4709   the subject. */
4710
4711   if (anchored || start_match > end_subject) break;
4712
4713   /* If we have just passed a CR and the newline option is CRLF or ANY or
4714   ANYCRLF, and we are now at a LF, advance the match position by one more
4715   character. */
4716
4717   if (start_match[-1] == '\r' &&
4718        (md->nltype == NLTYPE_ANY ||
4719         md->nltype == NLTYPE_ANYCRLF ||
4720         md->nllen == 2) &&
4721        start_match < end_subject &&
4722        *start_match == '\n')
4723     start_match++;
4724
4725   }   /* End of for(;;) "bumpalong" loop */
4726
4727 /* ==========================================================================*/
4728
4729 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4730 conditions is true:
4731
4732 (1) The pattern is anchored;
4733
4734 (2) We are past the end of the subject;
4735
4736 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4737     this option requests that a match occur at or before the first newline in
4738     the subject.
4739
4740 When we have a match and the offset vector is big enough to deal with any
4741 backreferences, captured substring offsets will already be set up. In the case
4742 where we had to get some local store to hold offsets for backreference
4743 processing, copy those that we can. In this case there need not be overflow if
4744 certain parts of the pattern were not used, even though there are more
4745 capturing parentheses than vector slots. */
4746
4747 if (rc == MATCH_MATCH)
4748   {
4749   if (using_temporary_offsets)
4750     {
4751     if (offsetcount >= 4)
4752       {
4753       memcpy(offsets + 2, md->offset_vector + 2,
4754         (offsetcount - 2) * sizeof(int));
4755       DPRINTF(("Copied offsets from temporary memory\n"));
4756       }
4757     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4758     DPRINTF(("Freeing temporary memory\n"));
4759     (pcre_free)(md->offset_vector);
4760     }
4761
4762   /* Set the return code to the number of captured strings, or 0 if there are
4763   too many to fit into the vector. */
4764
4765   rc = md->offset_overflow? 0 : md->end_offset_top/2;
4766
4767   /* If there is space, set up the whole thing as substring 0. The value of
4768   md->start_match_ptr might be modified if \K was encountered on the success
4769   matching path. */
4770
4771   if (offsetcount < 2) rc = 0; else
4772     {
4773     offsets[0] = md->start_match_ptr - md->start_subject;
4774     offsets[1] = md->end_match_ptr - md->start_subject;
4775     }
4776
4777   DPRINTF((">>>> returning %d\n", rc));
4778   return rc;
4779   }
4780
4781 /* Control gets here if there has been an error, or if the overall match
4782 attempt has failed at all permitted starting positions. */
4783
4784 if (using_temporary_offsets)
4785   {
4786   DPRINTF(("Freeing temporary memory\n"));
4787   (pcre_free)(md->offset_vector);
4788   }
4789
4790 if (rc != MATCH_NOMATCH)
4791   {
4792   DPRINTF((">>>> error: returning %d\n", rc));
4793   return rc;
4794   }
4795 else if (md->partial && md->hitend)
4796   {
4797   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4798   return PCRE_ERROR_PARTIAL;
4799   }
4800 else
4801   {
4802   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4803   return PCRE_ERROR_NOMATCH;
4804   }
4805 }
4806
4807 /* End of pcre_exec.c */