resource/csdk/connectivity/lib/android/glib-master/glib/pcre/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2010 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40
  41 /* This module contains pcre_exec(), the externally visible function that does
  42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  43 possible. There are also some static supporting functions. */
  44
  45 #ifdef HAVE_CONFIG_H
  46 #include "config.h"
  47 #endif
  48
  49 #define NLBLOCK md             /* Block containing newline information */
  50 #define PSSTART start_subject  /* Field containing processed string start */
  51 #define PSEND   end_subject    /* Field containing processed string end */
  52
  53 #include "pcre_internal.h"
  54
  55 /* Undefine some potentially clashing cpp symbols */
  56
  57 #undef min
  58 #undef max
  59
  60 /* Flag bits for the match() function */
  61
  62 #define match_condassert     0x01  /* Called to check a condition assertion */
  63 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  64
  65 /* Non-error returns from the match() function. Error returns are externally
  66 defined PCRE_ERROR_xxx codes, which are all negative. */
  67
  68 #define MATCH_MATCH        1
  69 #define MATCH_NOMATCH      0
  70
  71 /* Special internal returns from the match() function. Make them sufficiently
  72 negative to avoid the external error codes. */
  73
  74 #define MATCH_COMMIT       (-999)
  75 #define MATCH_PRUNE        (-998)
  76 #define MATCH_SKIP         (-997)
  77 #define MATCH_THEN         (-996)
  78
  79 /* Maximum number of ints of offset to save on the stack for recursive calls.
  80 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  81 because the offset vector is always a multiple of 3 long. */
  82
  83 #define REC_STACK_SAVE_MAX 30
  84
  85 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  86
  87 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  88 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  89
  90
  91
  92 #ifdef PCRE_DEBUG
  93 /*************************************************
  94 *        Debugging function to print chars       *
  95 *************************************************/
  96
  97 /* Print a sequence of chars in printable format, stopping at the end of the
  98 subject if the requested.
  99
 100 Arguments:
 101   p           points to characters
 102   length      number to print
 103   is_subject  TRUE if printing from within md->start_subject
 104   md          pointer to matching data block, if is_subject is TRUE
 105
 106 Returns:     nothing
 107 */
 108
 109 static void
 110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 111 {
 112 unsigned int c;
 113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 114 while (length-- > 0)
 115   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 116 }
 117 #endif
 118
 119
 120
 121 /*************************************************
 122 *          Match a back-reference                *
 123 *************************************************/
 124
 125 /* If a back reference hasn't been set, the length that is passed is greater
 126 than the number of characters left in the string, so the match fails.
 127
 128 Arguments:
 129   offset      index into the offset vector
 130   eptr        points into the subject
 131   length      length to be matched
 132   md          points to match data block
 133   ims         the ims flags
 134
 135 Returns:      TRUE if matched
 136 */
 137
 138 static BOOL
 139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 140   unsigned long int ims)
 141 {
 142 USPTR p = md->start_subject + md->offset_vector[offset];
 143
 144 #ifdef PCRE_DEBUG
 145 if (eptr >= md->end_subject)
 146   printf("matching subject <null>");
 147 else
 148   {
 149   printf("matching subject ");
 150   pchars(eptr, length, TRUE, md);
 151   }
 152 printf(" against backref ");
 153 pchars(p, length, FALSE, md);
 154 printf("\n");
 155 #endif
 156
 157 /* Always fail if not enough characters left */
 158
 159 if (length > md->end_subject - eptr) return FALSE;
 160
 161 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
 162 properly if Unicode properties are supported. Otherwise, we can check only
 163 ASCII characters. */
 164
 165 if ((ims & PCRE_CASELESS) != 0)
 166   {
 167 #ifdef SUPPORT_UTF8
 168 #ifdef SUPPORT_UCP
 169   if (md->utf8)
 170     {
 171     USPTR endptr = eptr + length;
 172     while (eptr < endptr)
 173       {
 174       int c, d;
 175       GETCHARINC(c, eptr);
 176       GETCHARINC(d, p);
 177       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
 178       }
 179     }
 180   else
 181 #endif
 182 #endif
 183
 184   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
 185   is no UCP support. */
 186
 187   while (length-- > 0)
 188     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
 189   }
 190
 191 /* In the caseful case, we can just compare the bytes, whether or not we
 192 are in UTF-8 mode. */
 193
 194 else
 195   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 196
 197 return TRUE;
 198 }
 199
 200
 201
 202 /***************************************************************************
 203 ****************************************************************************
 204                    RECURSION IN THE match() FUNCTION
 205
 206 The match() function is highly recursive, though not every recursive call
 207 increases the recursive depth. Nevertheless, some regular expressions can cause
 208 it to recurse to a great depth. I was writing for Unix, so I just let it call
 209 itself recursively. This uses the stack for saving everything that has to be
 210 saved for a recursive call. On Unix, the stack can be large, and this works
 211 fine.
 212
 213 It turns out that on some non-Unix-like systems there are problems with
 214 programs that use a lot of stack. (This despite the fact that every last chip
 215 has oodles of memory these days, and techniques for extending the stack have
 216 been known for decades.) So....
 217
 218 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 219 calls by keeping local variables that need to be preserved in blocks of memory
 220 obtained from malloc() instead instead of on the stack. Macros are used to
 221 achieve this so that the actual code doesn't look very different to what it
 222 always used to.
 223
 224 The original heap-recursive code used longjmp(). However, it seems that this
 225 can be very slow on some operating systems. Following a suggestion from Stan
 226 Switzer, the use of longjmp() has been abolished, at the cost of having to
 227 provide a unique number for each call to RMATCH. There is no way of generating
 228 a sequence of numbers at compile time in C. I have given them names, to make
 229 them stand out more clearly.
 230
 231 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 232 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 233 tests. Furthermore, not using longjmp() means that local dynamic variables
 234 don't have indeterminate values; this has meant that the frame size can be
 235 reduced because the result can be "passed back" by straight setting of the
 236 variable instead of being passed in the frame.
 237 ****************************************************************************
 238 ***************************************************************************/
 239
 240 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 241 below must be updated in sync.  */
 242
 243 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 244        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 245        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 246        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 247        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 248        RM51,  RM52, RM53, RM54 };
 249
 250 /* These versions of the macros use the stack, as normal. There are debugging
 251 versions and production versions. Note that the "rw" argument of RMATCH isn't
 252 actually used in this definition. */
 253
 254 #ifndef NO_RECURSE
 255 #define REGISTER register
 256
 257 #ifdef PCRE_DEBUG
 258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 259   { \
 260   printf("match() called in line %d\n", __LINE__); \
 261   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
 262   printf("to line %d\n", __LINE__); \
 263   }
 264 #define RRETURN(ra) \
 265   { \
 266   printf("match() returned %d from line %d ", ra, __LINE__); \
 267   return ra; \
 268   }
 269 #else
 270 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 271   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
 272 #define RRETURN(ra) return ra
 273 #endif
 274
 275 #else
 276
 277
 278 /* These versions of the macros manage a private stack on the heap. Note that
 279 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 280 argument of match(), which never changes. */
 281
 282 #define REGISTER
 283
 284 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
 285   {\
 286   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
 287   frame->Xwhere = rw; \
 288   newframe->Xeptr = ra;\
 289   newframe->Xecode = rb;\
 290   newframe->Xmstart = mstart;\
 291   newframe->Xmarkptr = markptr;\
 292   newframe->Xoffset_top = rc;\
 293   newframe->Xims = re;\
 294   newframe->Xeptrb = rf;\
 295   newframe->Xflags = rg;\
 296   newframe->Xrdepth = frame->Xrdepth + 1;\
 297   newframe->Xprevframe = frame;\
 298   frame = newframe;\
 299   DPRINTF(("restarting from line %d\n", __LINE__));\
 300   goto HEAP_RECURSE;\
 301   L_##rw:\
 302   DPRINTF(("jumped back to line %d\n", __LINE__));\
 303   }
 304
 305 #define RRETURN(ra)\
 306   {\
 307   heapframe *newframe = frame;\
 308   frame = newframe->Xprevframe;\
 309   (pcre_stack_free)(newframe);\
 310   if (frame != NULL)\
 311     {\
 312     rrc = ra;\
 313     goto HEAP_RETURN;\
 314     }\
 315   return ra;\
 316   }
 317
 318
 319 /* Structure for remembering the local variables in a private frame */
 320
 321 typedef struct heapframe {
 322   struct heapframe *Xprevframe;
 323
 324   /* Function arguments that may change */
 325
 326   USPTR Xeptr;
 327   const uschar *Xecode;
 328   USPTR Xmstart;
 329   USPTR Xmarkptr;
 330   int Xoffset_top;
 331   long int Xims;
 332   eptrblock *Xeptrb;
 333   int Xflags;
 334   unsigned int Xrdepth;
 335
 336   /* Function local variables */
 337
 338   USPTR Xcallpat;
 339 #ifdef SUPPORT_UTF8
 340   USPTR Xcharptr;
 341 #endif
 342   USPTR Xdata;
 343   USPTR Xnext;
 344   USPTR Xpp;
 345   USPTR Xprev;
 346   USPTR Xsaved_eptr;
 347
 348   recursion_info Xnew_recursive;
 349
 350   BOOL Xcur_is_word;
 351   BOOL Xcondition;
 352   BOOL Xprev_is_word;
 353
 354   unsigned long int Xoriginal_ims;
 355
 356 #ifdef SUPPORT_UCP
 357   int Xprop_type;
 358   int Xprop_value;
 359   int Xprop_fail_result;
 360   int Xprop_category;
 361   int Xprop_chartype;
 362   int Xprop_script;
 363   int Xoclength;
 364   uschar Xocchars[8];
 365 #endif
 366
 367   int Xcodelink;
 368   int Xctype;
 369   unsigned int Xfc;
 370   int Xfi;
 371   int Xlength;
 372   int Xmax;
 373   int Xmin;
 374   int Xnumber;
 375   int Xoffset;
 376   int Xop;
 377   int Xsave_capture_last;
 378   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 379   int Xstacksave[REC_STACK_SAVE_MAX];
 380
 381   eptrblock Xnewptrb;
 382
 383   /* Where to jump back to */
 384
 385   int Xwhere;
 386
 387 } heapframe;
 388
 389 #endif
 390
 391
 392 /***************************************************************************
 393 ***************************************************************************/
 394
 395
 396
 397 /*************************************************
 398 *         Match from current position            *
 399 *************************************************/
 400
 401 /* This function is called recursively in many circumstances. Whenever it
 402 returns a negative (error) response, the outer incarnation must also return the
 403 same response. */
 404
 405 /* These macros pack up tests that are used for partial matching, and which
 406 appears several times in the code. We set the "hit end" flag if the pointer is
 407 at the end of the subject and also past the start of the subject (i.e.
 408 something has been matched). For hard partial matching, we then return
 409 immediately. The second one is used when we already know we are past the end of
 410 the subject. */
 411
 412 #define CHECK_PARTIAL()\
 413   if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
 414     {\
 415     md->hitend = TRUE;\
 416     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
 417     }
 418
 419 #define SCHECK_PARTIAL()\
 420   if (md->partial != 0 && eptr > mstart)\
 421     {\
 422     md->hitend = TRUE;\
 423     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
 424     }
 425
 426
 427 /* Performance note: It might be tempting to extract commonly used fields from
 428 the md structure (e.g. utf8, end_subject) into individual variables to improve
 429 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 430 made performance worse.
 431
 432 Arguments:
 433    eptr        pointer to current character in subject
 434    ecode       pointer to current position in compiled code
 435    mstart      pointer to the current match start position (can be modified
 436                  by encountering \K)
 437    markptr     pointer to the most recent MARK name, or NULL
 438    offset_top  current top pointer
 439    md          pointer to "static" info for the match
 440    ims         current /i, /m, and /s options
 441    eptrb       pointer to chain of blocks containing eptr at start of
 442                  brackets - for testing for empty matches
 443    flags       can contain
 444                  match_condassert - this is an assertion condition
 445                  match_cbegroup - this is the start of an unlimited repeat
 446                    group that can match an empty string
 447    rdepth      the recursion depth
 448
 449 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 450                MATCH_NOMATCH if failed to match  )
 451                a negative PCRE_ERROR_xxx value if aborted by an error condition
 452                  (e.g. stopped by repeated call or recursion limit)
 453 */
 454
 455 static int
 456 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
 457   markptr, int offset_top, match_data *md, unsigned long int ims,
 458   eptrblock *eptrb, int flags, unsigned int rdepth)
 459 {
 460 /* These variables do not need to be preserved over recursion in this function,
 461 so they can be ordinary variables in all cases. Mark some of them with
 462 "register" because they are used a lot in loops. */
 463
 464 register int  rrc;         /* Returns from recursive calls */
 465 register int  i;           /* Used for loops not involving calls to RMATCH() */
 466 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 467 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 468
 469 BOOL minimize, possessive; /* Quantifier options */
 470 int condcode;
 471
 472 /* When recursion is not being used, all "local" variables that have to be
 473 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 474 heap storage. Set up the top-level frame here; others are obtained from the
 475 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 476
 477 #ifdef NO_RECURSE
 478 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
 479 frame->Xprevframe = NULL;            /* Marks the top level */
 480
 481 /* Copy in the original argument variables */
 482
 483 frame->Xeptr = eptr;
 484 frame->Xecode = ecode;
 485 frame->Xmstart = mstart;
 486 frame->Xmarkptr = markptr;
 487 frame->Xoffset_top = offset_top;
 488 frame->Xims = ims;
 489 frame->Xeptrb = eptrb;
 490 frame->Xflags = flags;
 491 frame->Xrdepth = rdepth;
 492
 493 /* This is where control jumps back to to effect "recursion" */
 494
 495 HEAP_RECURSE:
 496
 497 /* Macros make the argument variables come from the current frame */
 498
 499 #define eptr               frame->Xeptr
 500 #define ecode              frame->Xecode
 501 #define mstart             frame->Xmstart
 502 #define markptr            frame->Xmarkptr
 503 #define offset_top         frame->Xoffset_top
 504 #define ims                frame->Xims
 505 #define eptrb              frame->Xeptrb
 506 #define flags              frame->Xflags
 507 #define rdepth             frame->Xrdepth
 508
 509 /* Ditto for the local variables */
 510
 511 #ifdef SUPPORT_UTF8
 512 #define charptr            frame->Xcharptr
 513 #endif
 514 #define callpat            frame->Xcallpat
 515 #define codelink           frame->Xcodelink
 516 #define data               frame->Xdata
 517 #define next               frame->Xnext
 518 #define pp                 frame->Xpp
 519 #define prev               frame->Xprev
 520 #define saved_eptr         frame->Xsaved_eptr
 521
 522 #define new_recursive      frame->Xnew_recursive
 523
 524 #define cur_is_word        frame->Xcur_is_word
 525 #define condition          frame->Xcondition
 526 #define prev_is_word       frame->Xprev_is_word
 527
 528 #define original_ims       frame->Xoriginal_ims
 529
 530 #ifdef SUPPORT_UCP
 531 #define prop_type          frame->Xprop_type
 532 #define prop_value         frame->Xprop_value
 533 #define prop_fail_result   frame->Xprop_fail_result
 534 #define prop_category      frame->Xprop_category
 535 #define prop_chartype      frame->Xprop_chartype
 536 #define prop_script        frame->Xprop_script
 537 #define oclength           frame->Xoclength
 538 #define occhars            frame->Xocchars
 539 #endif
 540
 541 #define ctype              frame->Xctype
 542 #define fc                 frame->Xfc
 543 #define fi                 frame->Xfi
 544 #define length             frame->Xlength
 545 #define max                frame->Xmax
 546 #define min                frame->Xmin
 547 #define number             frame->Xnumber
 548 #define offset             frame->Xoffset
 549 #define op                 frame->Xop
 550 #define save_capture_last  frame->Xsave_capture_last
 551 #define save_offset1       frame->Xsave_offset1
 552 #define save_offset2       frame->Xsave_offset2
 553 #define save_offset3       frame->Xsave_offset3
 554 #define stacksave          frame->Xstacksave
 555
 556 #define newptrb            frame->Xnewptrb
 557
 558 /* When recursion is being used, local variables are allocated on the stack and
 559 get preserved during recursion in the normal way. In this environment, fi and
 560 i, and fc and c, can be the same variables. */
 561
 562 #else         /* NO_RECURSE not defined */
 563 #define fi i
 564 #define fc c
 565
 566
 567 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 568 const uschar *charptr;             /* in small blocks of the code. My normal */
 569 #endif                             /* style of coding would have declared    */
 570 const uschar *callpat;             /* them within each of those blocks.      */
 571 const uschar *data;                /* However, in order to accommodate the   */
 572 const uschar *next;                /* version of this code that uses an      */
 573 USPTR         pp;                  /* external "stack" implemented on the    */
 574 const uschar *prev;                /* heap, it is easier to declare them all */
 575 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 576                                    /* out in a block. The only declarations  */
 577 recursion_info new_recursive;      /* within blocks below are for variables  */
 578                                    /* that do not have to be preserved over  */
 579 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 580 BOOL condition;
 581 BOOL prev_is_word;
 582
 583 unsigned long int original_ims;
 584
 585 #ifdef SUPPORT_UCP
 586 int prop_type;
 587 int prop_value;
 588 int prop_fail_result;
 589 int prop_category;
 590 int prop_chartype;
 591 int prop_script;
 592 int oclength;
 593 uschar occhars[8];
 594 #endif
 595
 596 int codelink;
 597 int ctype;
 598 int length;
 599 int max;
 600 int min;
 601 int number;
 602 int offset;
 603 int op;
 604 int save_capture_last;
 605 int save_offset1, save_offset2, save_offset3;
 606 int stacksave[REC_STACK_SAVE_MAX];
 607
 608 eptrblock newptrb;
 609 #endif     /* NO_RECURSE */
 610
 611 /* These statements are here to stop the compiler complaining about unitialized
 612 variables. */
 613
 614 #ifdef SUPPORT_UCP
 615 prop_value = 0;
 616 prop_fail_result = 0;
 617 #endif
 618
 619
 620 /* This label is used for tail recursion, which is used in a few cases even
 621 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 622 used. Thanks to Ian Taylor for noticing this possibility and sending the
 623 original patch. */
 624
 625 TAIL_RECURSE:
 626
 627 /* OK, now we can get on with the real code of the function. Recursive calls
 628 are specified by the macro RMATCH and RRETURN is used to return. When
 629 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 630 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 631 defined). However, RMATCH isn't like a function call because it's quite a
 632 complicated macro. It has to be used in one particular way. This shouldn't,
 633 however, impact performance when true recursion is being used. */
 634
 635 #ifdef SUPPORT_UTF8
 636 utf8 = md->utf8;       /* Local copy of the flag */
 637 #else
 638 utf8 = FALSE;
 639 #endif
 640
 641 /* First check that we haven't called match() too many times, or that we
 642 haven't exceeded the recursive call limit. */
 643
 644 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 645 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 646
 647 original_ims = ims;    /* Save for resetting on ')' */
 648
 649 /* At the start of a group with an unlimited repeat that may match an empty
 650 string, the match_cbegroup flag is set. When this is the case, add the current
 651 subject pointer to the chain of such remembered pointers, to be checked when we
 652 hit the closing ket, in order to break infinite loops that match no characters.
 653 When match() is called in other circumstances, don't add to the chain. The
 654 match_cbegroup flag must NOT be used with tail recursion, because the memory
 655 block that is used is on the stack, so a new one may be required for each
 656 match(). */
 657
 658 if ((flags & match_cbegroup) != 0)
 659   {
 660   newptrb.epb_saved_eptr = eptr;
 661   newptrb.epb_prev = eptrb;
 662   eptrb = &newptrb;
 663   }
 664
 665 /* Now start processing the opcodes. */
 666
 667 for (;;)
 668   {
 669   minimize = possessive = FALSE;
 670   op = *ecode;
 671
 672   switch(op)
 673     {
 674     case OP_FAIL:
 675     RRETURN(MATCH_NOMATCH);
 676
 677     case OP_PRUNE:
 678     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 679       ims, eptrb, flags, RM51);
 680     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 681     RRETURN(MATCH_PRUNE);
 682
 683     case OP_COMMIT:
 684     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 685       ims, eptrb, flags, RM52);
 686     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 687     RRETURN(MATCH_COMMIT);
 688
 689     case OP_SKIP:
 690     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 691       ims, eptrb, flags, RM53);
 692     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 693     md->start_match_ptr = eptr;   /* Pass back current position */
 694     RRETURN(MATCH_SKIP);
 695
 696     case OP_THEN:
 697     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 698       ims, eptrb, flags, RM54);
 699     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 700     RRETURN(MATCH_THEN);
 701
 702     /* Handle a capturing bracket. If there is space in the offset vector, save
 703     the current subject position in the working slot at the top of the vector.
 704     We mustn't change the current values of the data slot, because they may be
 705     set from a previous iteration of this group, and be referred to by a
 706     reference inside the group.
 707
 708     If the bracket fails to match, we need to restore this value and also the
 709     values of the final offsets, in case they were set by a previous iteration
 710     of the same bracket.
 711
 712     If there isn't enough space in the offset vector, treat this as if it were
 713     a non-capturing bracket. Don't worry about setting the flag for the error
 714     case here; that is handled in the code for KET. */
 715
 716     case OP_CBRA:
 717     case OP_SCBRA:
 718     number = GET2(ecode, 1+LINK_SIZE);
 719     offset = number << 1;
 720
 721 #ifdef PCRE_DEBUG
 722     printf("start bracket %d\n", number);
 723     printf("subject=");
 724     pchars(eptr, 16, TRUE, md);
 725     printf("\n");
 726 #endif
 727
 728     if (offset < md->offset_max)
 729       {
 730       save_offset1 = md->offset_vector[offset];
 731       save_offset2 = md->offset_vector[offset+1];
 732       save_offset3 = md->offset_vector[md->offset_end - number];
 733       save_capture_last = md->capture_last;
 734
 735       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 736       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
 737
 738       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 739       do
 740         {
 741         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 742           ims, eptrb, flags, RM1);
 743         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 744         md->capture_last = save_capture_last;
 745         ecode += GET(ecode, 1);
 746         }
 747       while (*ecode == OP_ALT);
 748
 749       DPRINTF(("bracket %d failed\n", number));
 750
 751       md->offset_vector[offset] = save_offset1;
 752       md->offset_vector[offset+1] = save_offset2;
 753       md->offset_vector[md->offset_end - number] = save_offset3;
 754
 755       RRETURN(MATCH_NOMATCH);
 756       }
 757
 758     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 759     as a non-capturing bracket. */
 760
 761     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 762     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 763
 764     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 765
 766     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 767     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 768
 769     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 770     final alternative within the brackets, we would return the result of a
 771     recursive call to match() whatever happened. We can reduce stack usage by
 772     turning this into a tail recursion, except in the case when match_cbegroup
 773     is set.*/
 774
 775     case OP_BRA:
 776     case OP_SBRA:
 777     DPRINTF(("start non-capturing bracket\n"));
 778     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 779     for (;;)
 780       {
 781       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
 782         {
 783         if (flags == 0)    /* Not a possibly empty group */
 784           {
 785           ecode += _pcre_OP_lengths[*ecode];
 786           DPRINTF(("bracket 0 tail recursion\n"));
 787           goto TAIL_RECURSE;
 788           }
 789
 790         /* Possibly empty group; can't use tail recursion. */
 791
 792         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 793           eptrb, flags, RM48);
 794         RRETURN(rrc);
 795         }
 796
 797       /* For non-final alternatives, continue the loop for a NOMATCH result;
 798       otherwise return. */
 799
 800       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 801         eptrb, flags, RM2);
 802       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 803       ecode += GET(ecode, 1);
 804       }
 805     /* Control never reaches here. */
 806
 807     /* Conditional group: compilation checked that there are no more than
 808     two branches. If the condition is false, skipping the first branch takes us
 809     past the end if there is only one branch, but that's OK because that is
 810     exactly what going to the ket would do. As there is only one branch to be
 811     obeyed, we can use tail recursion to avoid using another stack frame. */
 812
 813     case OP_COND:
 814     case OP_SCOND:
 815     codelink= GET(ecode, 1);
 816
 817     /* Because of the way auto-callout works during compile, a callout item is
 818     inserted between OP_COND and an assertion condition. */
 819
 820     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
 821       {
 822       if (pcre_callout != NULL)
 823         {
 824         pcre_callout_block cb;
 825         cb.version          = 1;   /* Version 1 of the callout block */
 826         cb.callout_number   = ecode[LINK_SIZE+2];
 827         cb.offset_vector    = md->offset_vector;
 828         cb.subject          = (PCRE_SPTR)md->start_subject;
 829         cb.subject_length   = md->end_subject - md->start_subject;
 830         cb.start_match      = mstart - md->start_subject;
 831         cb.current_position = eptr - md->start_subject;
 832         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
 833         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
 834         cb.capture_top      = offset_top/2;
 835         cb.capture_last     = md->capture_last;
 836         cb.callout_data     = md->callout_data;
 837         if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 838         if (rrc < 0) RRETURN(rrc);
 839         }
 840       ecode += _pcre_OP_lengths[OP_CALLOUT];
 841       }
 842
 843     condcode = ecode[LINK_SIZE+1];
 844
 845     /* Now see what the actual condition is */
 846
 847     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
 848       {
 849       if (md->recursive == NULL)                /* Not recursing => FALSE */
 850         {
 851         condition = FALSE;
 852         ecode += GET(ecode, 1);
 853         }
 854       else
 855         {
 856         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
 857         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
 858
 859         /* If the test is for recursion into a specific subpattern, and it is
 860         false, but the test was set up by name, scan the table to see if the
 861         name refers to any other numbers, and test them. The condition is true
 862         if any one is set. */
 863
 864         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
 865           {
 866           uschar *slotA = md->name_table;
 867           for (i = 0; i < md->name_count; i++)
 868             {
 869             if (GET2(slotA, 0) == recno) break;
 870             slotA += md->name_entry_size;
 871             }
 872
 873           /* Found a name for the number - there can be only one; duplicate
 874           names for different numbers are allowed, but not vice versa. First
 875           scan down for duplicates. */
 876
 877           if (i < md->name_count)
 878             {
 879             uschar *slotB = slotA;
 880             while (slotB > md->name_table)
 881               {
 882               slotB -= md->name_entry_size;
 883               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
 884                 {
 885                 condition = GET2(slotB, 0) == md->recursive->group_num;
 886                 if (condition) break;
 887                 }
 888               else break;
 889               }
 890
 891             /* Scan up for duplicates */
 892
 893             if (!condition)
 894               {
 895               slotB = slotA;
 896               for (i++; i < md->name_count; i++)
 897                 {
 898                 slotB += md->name_entry_size;
 899                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
 900                   {
 901                   condition = GET2(slotB, 0) == md->recursive->group_num;
 902                   if (condition) break;
 903                   }
 904                 else break;
 905                 }
 906               }
 907             }
 908           }
 909
 910         /* Chose branch according to the condition */
 911
 912         ecode += condition? 3 : GET(ecode, 1);
 913         }
 914       }
 915
 916     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
 917       {
 918       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
 919       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 920
 921       /* If the numbered capture is unset, but the reference was by name,
 922       scan the table to see if the name refers to any other numbers, and test
 923       them. The condition is true if any one is set. This is tediously similar
 924       to the code above, but not close enough to try to amalgamate. */
 925
 926       if (!condition && condcode == OP_NCREF)
 927         {
 928         int refno = offset >> 1;
 929         uschar *slotA = md->name_table;
 930
 931         for (i = 0; i < md->name_count; i++)
 932           {
 933           if (GET2(slotA, 0) == refno) break;
 934           slotA += md->name_entry_size;
 935           }
 936
 937         /* Found a name for the number - there can be only one; duplicate names
 938         for different numbers are allowed, but not vice versa. First scan down
 939         for duplicates. */
 940
 941         if (i < md->name_count)
 942           {
 943           uschar *slotB = slotA;
 944           while (slotB > md->name_table)
 945             {
 946             slotB -= md->name_entry_size;
 947             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
 948               {
 949               offset = GET2(slotB, 0) << 1;
 950               condition = offset < offset_top &&
 951                 md->offset_vector[offset] >= 0;
 952               if (condition) break;
 953               }
 954             else break;
 955             }
 956
 957           /* Scan up for duplicates */
 958
 959           if (!condition)
 960             {
 961             slotB = slotA;
 962             for (i++; i < md->name_count; i++)
 963               {
 964               slotB += md->name_entry_size;
 965               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
 966                 {
 967                 offset = GET2(slotB, 0) << 1;
 968                 condition = offset < offset_top &&
 969                   md->offset_vector[offset] >= 0;
 970                 if (condition) break;
 971                 }
 972               else break;
 973               }
 974             }
 975           }
 976         }
 977
 978       /* Chose branch according to the condition */
 979
 980       ecode += condition? 3 : GET(ecode, 1);
 981       }
 982
 983     else if (condcode == OP_DEF)     /* DEFINE - always false */
 984       {
 985       condition = FALSE;
 986       ecode += GET(ecode, 1);
 987       }
 988
 989     /* The condition is an assertion. Call match() to evaluate it - setting
 990     the final argument match_condassert causes it to stop at the end of an
 991     assertion. */
 992
 993     else
 994       {
 995       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
 996           match_condassert, RM3);
 997       if (rrc == MATCH_MATCH)
 998         {
 999         condition = TRUE;
1000         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1001         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1002         }
1003       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1004         {
1005         RRETURN(rrc);         /* Need braces because of following else */
1006         }
1007       else
1008         {
1009         condition = FALSE;
1010         ecode += codelink;
1011         }
1012       }
1013
1014     /* We are now at the branch that is to be obeyed. As there is only one,
1015     we can use tail recursion to avoid using another stack frame, except when
1016     match_cbegroup is required for an unlimited repeat of a possibly empty
1017     group. If the second alternative doesn't exist, we can just plough on. */
1018
1019     if (condition || *ecode == OP_ALT)
1020       {
1021       ecode += 1 + LINK_SIZE;
1022       if (op == OP_SCOND)        /* Possibly empty group */
1023         {
1024         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1025         RRETURN(rrc);
1026         }
1027       else                       /* Group must match something */
1028         {
1029         flags = 0;
1030         goto TAIL_RECURSE;
1031         }
1032       }
1033     else                         /* Condition false & no alternative */
1034       {
1035       ecode += 1 + LINK_SIZE;
1036       }
1037     break;
1038
1039
1040     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1041     to close any currently open capturing brackets. */
1042
1043     case OP_CLOSE:
1044     number = GET2(ecode, 1);
1045     offset = number << 1;
1046
1047 #ifdef PCRE_DEBUG
1048       printf("end bracket %d at *ACCEPT", number);
1049       printf("\n");
1050 #endif
1051
1052     md->capture_last = number;
1053     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1054       {
1055       md->offset_vector[offset] =
1056         md->offset_vector[md->offset_end - number];
1057       md->offset_vector[offset+1] = eptr - md->start_subject;
1058       if (offset_top <= offset) offset_top = offset + 2;
1059       }
1060     ecode += 3;
1061     break;
1062
1063
1064     /* End of the pattern, either real or forced. If we are in a top-level
1065     recursion, we should restore the offsets appropriately and continue from
1066     after the call. */
1067
1068     case OP_ACCEPT:
1069     case OP_END:
1070     if (md->recursive != NULL && md->recursive->group_num == 0)
1071       {
1072       recursion_info *rec = md->recursive;
1073       DPRINTF(("End of pattern in a (?0) recursion\n"));
1074       md->recursive = rec->prevrec;
1075       memmove(md->offset_vector, rec->offset_save,
1076         rec->saved_max * sizeof(int));
1077       offset_top = rec->save_offset_top;
1078       ims = original_ims;
1079       ecode = rec->after_call;
1080       break;
1081       }
1082
1083     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1084     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1085     the subject. In both cases, backtracking will then try other alternatives,
1086     if any. */
1087
1088     if (eptr == mstart &&
1089         (md->notempty ||
1090           (md->notempty_atstart &&
1091             mstart == md->start_subject + md->start_offset)))
1092       RRETURN(MATCH_NOMATCH);
1093
1094     /* Otherwise, we have a match. */
1095
1096     md->end_match_ptr = eptr;           /* Record where we ended */
1097     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1098     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1099     RRETURN(MATCH_MATCH);
1100
1101     /* Change option settings */
1102
1103     case OP_OPT:
1104     ims = ecode[1];
1105     ecode += 2;
1106     DPRINTF(("ims set to %02lx\n", ims));
1107     break;
1108
1109     /* Assertion brackets. Check the alternative branches in turn - the
1110     matching won't pass the KET for an assertion. If any one branch matches,
1111     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1112     start of each branch to move the current point backwards, so the code at
1113     this level is identical to the lookahead case. */
1114
1115     case OP_ASSERT:
1116     case OP_ASSERTBACK:
1117     do
1118       {
1119       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1120         RM4);
1121       if (rrc == MATCH_MATCH)
1122         {
1123         mstart = md->start_match_ptr;   /* In case \K reset it */
1124         break;
1125         }
1126       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1127       ecode += GET(ecode, 1);
1128       }
1129     while (*ecode == OP_ALT);
1130     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1131
1132     /* If checking an assertion for a condition, return MATCH_MATCH. */
1133
1134     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1135
1136     /* Continue from after the assertion, updating the offsets high water
1137     mark, since extracts may have been taken during the assertion. */
1138
1139     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1140     ecode += 1 + LINK_SIZE;
1141     offset_top = md->end_offset_top;
1142     continue;
1143
1144     /* Negative assertion: all branches must fail to match. Encountering SKIP,
1145     PRUNE, or COMMIT means we must assume failure without checking subsequent
1146     branches. */
1147
1148     case OP_ASSERT_NOT:
1149     case OP_ASSERTBACK_NOT:
1150     do
1151       {
1152       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1153         RM5);
1154       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1155       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1156         {
1157         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1158         break;
1159         }
1160       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1161       ecode += GET(ecode,1);
1162       }
1163     while (*ecode == OP_ALT);
1164
1165     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1166
1167     ecode += 1 + LINK_SIZE;
1168     continue;
1169
1170     /* Move the subject pointer back. This occurs only at the start of
1171     each branch of a lookbehind assertion. If we are too close to the start to
1172     move back, this match function fails. When working with UTF-8 we move
1173     back a number of characters, not bytes. */
1174
1175     case OP_REVERSE:
1176 #ifdef SUPPORT_UTF8
1177     if (utf8)
1178       {
1179       i = GET(ecode, 1);
1180       while (i-- > 0)
1181         {
1182         eptr--;
1183         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1184         BACKCHAR(eptr);
1185         }
1186       }
1187     else
1188 #endif
1189
1190     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1191
1192       {
1193       eptr -= GET(ecode, 1);
1194       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1195       }
1196
1197     /* Save the earliest consulted character, then skip to next op code */
1198
1199     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1200     ecode += 1 + LINK_SIZE;
1201     break;
1202
1203     /* The callout item calls an external function, if one is provided, passing
1204     details of the match so far. This is mainly for debugging, though the
1205     function is able to force a failure. */
1206
1207     case OP_CALLOUT:
1208     if (pcre_callout != NULL)
1209       {
1210       pcre_callout_block cb;
1211       cb.version          = 1;   /* Version 1 of the callout block */
1212       cb.callout_number   = ecode[1];
1213       cb.offset_vector    = md->offset_vector;
1214       cb.subject          = (PCRE_SPTR)md->start_subject;
1215       cb.subject_length   = md->end_subject - md->start_subject;
1216       cb.start_match      = mstart - md->start_subject;
1217       cb.current_position = eptr - md->start_subject;
1218       cb.pattern_position = GET(ecode, 2);
1219       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1220       cb.capture_top      = offset_top/2;
1221       cb.capture_last     = md->capture_last;
1222       cb.callout_data     = md->callout_data;
1223       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1224       if (rrc < 0) RRETURN(rrc);
1225       }
1226     ecode += 2 + 2*LINK_SIZE;
1227     break;
1228
1229     /* Recursion either matches the current regex, or some subexpression. The
1230     offset data is the offset to the starting bracket from the start of the
1231     whole pattern. (This is so that it works from duplicated subpatterns.)
1232
1233     If there are any capturing brackets started but not finished, we have to
1234     save their starting points and reinstate them after the recursion. However,
1235     we don't know how many such there are (offset_top records the completed
1236     total) so we just have to save all the potential data. There may be up to
1237     65535 such values, which is too large to put on the stack, but using malloc
1238     for small numbers seems expensive. As a compromise, the stack is used when
1239     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1240     is used. A problem is what to do if the malloc fails ... there is no way of
1241     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1242     values on the stack, and accept that the rest may be wrong.
1243
1244     There are also other values that have to be saved. We use a chained
1245     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1246     for the original version of this logic. */
1247
1248     case OP_RECURSE:
1249       {
1250       callpat = md->start_code + GET(ecode, 1);
1251       new_recursive.group_num = (callpat == md->start_code)? 0 :
1252         GET2(callpat, 1 + LINK_SIZE);
1253
1254       /* Add to "recursing stack" */
1255
1256       new_recursive.prevrec = md->recursive;
1257       md->recursive = &new_recursive;
1258
1259       /* Find where to continue from afterwards */
1260
1261       ecode += 1 + LINK_SIZE;
1262       new_recursive.after_call = ecode;
1263
1264       /* Now save the offset data. */
1265
1266       new_recursive.saved_max = md->offset_end;
1267       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1268         new_recursive.offset_save = stacksave;
1269       else
1270         {
1271         new_recursive.offset_save =
1272           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1273         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1274         }
1275
1276       memcpy(new_recursive.offset_save, md->offset_vector,
1277             new_recursive.saved_max * sizeof(int));
1278       new_recursive.save_offset_top = offset_top;
1279
1280       /* OK, now we can do the recursion. For each top-level alternative we
1281       restore the offset and recursion data. */
1282
1283       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1284       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1285       do
1286         {
1287         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1288           md, ims, eptrb, flags, RM6);
1289         if (rrc == MATCH_MATCH)
1290           {
1291           DPRINTF(("Recursion matched\n"));
1292           md->recursive = new_recursive.prevrec;
1293           if (new_recursive.offset_save != stacksave)
1294             (pcre_free)(new_recursive.offset_save);
1295           RRETURN(MATCH_MATCH);
1296           }
1297         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1298           {
1299           DPRINTF(("Recursion gave error %d\n", rrc));
1300           if (new_recursive.offset_save != stacksave)
1301             (pcre_free)(new_recursive.offset_save);
1302           RRETURN(rrc);
1303           }
1304
1305         md->recursive = &new_recursive;
1306         memcpy(md->offset_vector, new_recursive.offset_save,
1307             new_recursive.saved_max * sizeof(int));
1308         callpat += GET(callpat, 1);
1309         }
1310       while (*callpat == OP_ALT);
1311
1312       DPRINTF(("Recursion didn't match\n"));
1313       md->recursive = new_recursive.prevrec;
1314       if (new_recursive.offset_save != stacksave)
1315         (pcre_free)(new_recursive.offset_save);
1316       RRETURN(MATCH_NOMATCH);
1317       }
1318     /* Control never reaches here */
1319
1320     /* "Once" brackets are like assertion brackets except that after a match,
1321     the point in the subject string is not moved back. Thus there can never be
1322     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1323     Check the alternative branches in turn - the matching won't pass the KET
1324     for this kind of subpattern. If any one branch matches, we carry on as at
1325     the end of a normal bracket, leaving the subject pointer, but resetting
1326     the start-of-match value in case it was changed by \K. */
1327
1328     case OP_ONCE:
1329     prev = ecode;
1330     saved_eptr = eptr;
1331
1332     do
1333       {
1334       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1335       if (rrc == MATCH_MATCH)
1336         {
1337         mstart = md->start_match_ptr;
1338         break;
1339         }
1340       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1341       ecode += GET(ecode,1);
1342       }
1343     while (*ecode == OP_ALT);
1344
1345     /* If hit the end of the group (which could be repeated), fail */
1346
1347     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1348
1349     /* Continue as from after the assertion, updating the offsets high water
1350     mark, since extracts may have been taken. */
1351
1352     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1353
1354     offset_top = md->end_offset_top;
1355     eptr = md->end_match_ptr;
1356
1357     /* For a non-repeating ket, just continue at this level. This also
1358     happens for a repeating ket if no characters were matched in the group.
1359     This is the forcible breaking of infinite loops as implemented in Perl
1360     5.005. If there is an options reset, it will get obeyed in the normal
1361     course of events. */
1362
1363     if (*ecode == OP_KET || eptr == saved_eptr)
1364       {
1365       ecode += 1+LINK_SIZE;
1366       break;
1367       }
1368
1369     /* The repeating kets try the rest of the pattern or restart from the
1370     preceding bracket, in the appropriate order. The second "call" of match()
1371     uses tail recursion, to avoid using another stack frame. We need to reset
1372     any options that changed within the bracket before re-running it, so
1373     check the next opcode. */
1374
1375     if (ecode[1+LINK_SIZE] == OP_OPT)
1376       {
1377       ims = (ims & ~PCRE_IMS) | ecode[4];
1378       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1379       }
1380
1381     if (*ecode == OP_KETRMIN)
1382       {
1383       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1384       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1385       ecode = prev;
1386       flags = 0;
1387       goto TAIL_RECURSE;
1388       }
1389     else  /* OP_KETRMAX */
1390       {
1391       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1392       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1393       ecode += 1 + LINK_SIZE;
1394       flags = 0;
1395       goto TAIL_RECURSE;
1396       }
1397     /* Control never gets here */
1398
1399     /* An alternation is the end of a branch; scan along to find the end of the
1400     bracketed group and go to there. */
1401
1402     case OP_ALT:
1403     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1404     break;
1405
1406     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1407     indicating that it may occur zero times. It may repeat infinitely, or not
1408     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1409     with fixed upper repeat limits are compiled as a number of copies, with the
1410     optional ones preceded by BRAZERO or BRAMINZERO. */
1411
1412     case OP_BRAZERO:
1413       {
1414       next = ecode+1;
1415       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1416       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1417       do next += GET(next,1); while (*next == OP_ALT);
1418       ecode = next + 1 + LINK_SIZE;
1419       }
1420     break;
1421
1422     case OP_BRAMINZERO:
1423       {
1424       next = ecode+1;
1425       do next += GET(next, 1); while (*next == OP_ALT);
1426       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1427       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1428       ecode++;
1429       }
1430     break;
1431
1432     case OP_SKIPZERO:
1433       {
1434       next = ecode+1;
1435       do next += GET(next,1); while (*next == OP_ALT);
1436       ecode = next + 1 + LINK_SIZE;
1437       }
1438     break;
1439
1440     /* End of a group, repeated or non-repeating. */
1441
1442     case OP_KET:
1443     case OP_KETRMIN:
1444     case OP_KETRMAX:
1445     prev = ecode - GET(ecode, 1);
1446
1447     /* If this was a group that remembered the subject start, in order to break
1448     infinite repeats of empty string matches, retrieve the subject start from
1449     the chain. Otherwise, set it NULL. */
1450
1451     if (*prev >= OP_SBRA)
1452       {
1453       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1454       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1455       }
1456     else saved_eptr = NULL;
1457
1458     /* If we are at the end of an assertion group or an atomic group, stop
1459     matching and return MATCH_MATCH, but record the current high water mark for
1460     use by positive assertions. We also need to record the match start in case
1461     it was changed by \K. */
1462
1463     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1464         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1465         *prev == OP_ONCE)
1466       {
1467       md->end_match_ptr = eptr;      /* For ONCE */
1468       md->end_offset_top = offset_top;
1469       md->start_match_ptr = mstart;
1470       RRETURN(MATCH_MATCH);
1471       }
1472
1473     /* For capturing groups we have to check the group number back at the start
1474     and if necessary complete handling an extraction by setting the offsets and
1475     bumping the high water mark. Note that whole-pattern recursion is coded as
1476     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1477     when the OP_END is reached. Other recursion is handled here. */
1478
1479     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1480       {
1481       number = GET2(prev, 1+LINK_SIZE);
1482       offset = number << 1;
1483
1484 #ifdef PCRE_DEBUG
1485       printf("end bracket %d", number);
1486       printf("\n");
1487 #endif
1488
1489       md->capture_last = number;
1490       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1491         {
1492         md->offset_vector[offset] =
1493           md->offset_vector[md->offset_end - number];
1494         md->offset_vector[offset+1] = eptr - md->start_subject;
1495         if (offset_top <= offset) offset_top = offset + 2;
1496         }
1497
1498       /* Handle a recursively called group. Restore the offsets
1499       appropriately and continue from after the call. */
1500
1501       if (md->recursive != NULL && md->recursive->group_num == number)
1502         {
1503         recursion_info *rec = md->recursive;
1504         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1505         md->recursive = rec->prevrec;
1506         memcpy(md->offset_vector, rec->offset_save,
1507           rec->saved_max * sizeof(int));
1508         offset_top = rec->save_offset_top;
1509         ecode = rec->after_call;
1510         ims = original_ims;
1511         break;
1512         }
1513       }
1514
1515     /* For both capturing and non-capturing groups, reset the value of the ims
1516     flags, in case they got changed during the group. */
1517
1518     ims = original_ims;
1519     DPRINTF(("ims reset to %02lx\n", ims));
1520
1521     /* For a non-repeating ket, just continue at this level. This also
1522     happens for a repeating ket if no characters were matched in the group.
1523     This is the forcible breaking of infinite loops as implemented in Perl
1524     5.005. If there is an options reset, it will get obeyed in the normal
1525     course of events. */
1526
1527     if (*ecode == OP_KET || eptr == saved_eptr)
1528       {
1529       ecode += 1 + LINK_SIZE;
1530       break;
1531       }
1532
1533     /* The repeating kets try the rest of the pattern or restart from the
1534     preceding bracket, in the appropriate order. In the second case, we can use
1535     tail recursion to avoid using another stack frame, unless we have an
1536     unlimited repeat of a group that can match an empty string. */
1537
1538     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1539
1540     if (*ecode == OP_KETRMIN)
1541       {
1542       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1543       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1544       if (flags != 0)    /* Could match an empty string */
1545         {
1546         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1547         RRETURN(rrc);
1548         }
1549       ecode = prev;
1550       goto TAIL_RECURSE;
1551       }
1552     else  /* OP_KETRMAX */
1553       {
1554       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1555       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1556       ecode += 1 + LINK_SIZE;
1557       flags = 0;
1558       goto TAIL_RECURSE;
1559       }
1560     /* Control never gets here */
1561
1562     /* Start of subject unless notbol, or after internal newline if multiline */
1563
1564     case OP_CIRC:
1565     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1566     if ((ims & PCRE_MULTILINE) != 0)
1567       {
1568       if (eptr != md->start_subject &&
1569           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1570         RRETURN(MATCH_NOMATCH);
1571       ecode++;
1572       break;
1573       }
1574     /* ... else fall through */
1575
1576     /* Start of subject assertion */
1577
1578     case OP_SOD:
1579     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1580     ecode++;
1581     break;
1582
1583     /* Start of match assertion */
1584
1585     case OP_SOM:
1586     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1587     ecode++;
1588     break;
1589
1590     /* Reset the start of match point */
1591
1592     case OP_SET_SOM:
1593     mstart = eptr;
1594     ecode++;
1595     break;
1596
1597     /* Assert before internal newline if multiline, or before a terminating
1598     newline unless endonly is set, else end of subject unless noteol is set. */
1599
1600     case OP_DOLL:
1601     if ((ims & PCRE_MULTILINE) != 0)
1602       {
1603       if (eptr < md->end_subject)
1604         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1605       else
1606         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1607       ecode++;
1608       break;
1609       }
1610     else
1611       {
1612       if (md->noteol) RRETURN(MATCH_NOMATCH);
1613       if (!md->endonly)
1614         {
1615         if (eptr != md->end_subject &&
1616             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1617           RRETURN(MATCH_NOMATCH);
1618         ecode++;
1619         break;
1620         }
1621       }
1622     /* ... else fall through for endonly */
1623
1624     /* End of subject assertion (\z) */
1625
1626     case OP_EOD:
1627     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1628     ecode++;
1629     break;
1630
1631     /* End of subject or ending \n assertion (\Z) */
1632
1633     case OP_EODN:
1634     if (eptr != md->end_subject &&
1635         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1636       RRETURN(MATCH_NOMATCH);
1637     ecode++;
1638     break;
1639
1640     /* Word boundary assertions */
1641
1642     case OP_NOT_WORD_BOUNDARY:
1643     case OP_WORD_BOUNDARY:
1644       {
1645
1646       /* Find out if the previous and current characters are "word" characters.
1647       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1648       be "non-word" characters. Remember the earliest consulted character for
1649       partial matching. */
1650
1651 #ifdef SUPPORT_UTF8
1652       if (utf8)
1653         {
1654         if (eptr == md->start_subject) prev_is_word = FALSE; else
1655           {
1656           USPTR lastptr = eptr - 1;
1657           while((*lastptr & 0xc0) == 0x80) lastptr--;
1658           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1659           GETCHAR(c, lastptr);
1660           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1661           }
1662         if (eptr >= md->end_subject)
1663           {
1664           SCHECK_PARTIAL();
1665           cur_is_word = FALSE;
1666           }
1667         else
1668           {
1669           GETCHAR(c, eptr);
1670           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1671           }
1672         }
1673       else
1674 #endif
1675
1676       /* Not in UTF-8 mode */
1677
1678         {
1679         if (eptr == md->start_subject) prev_is_word = FALSE; else
1680           {
1681           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1682           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1683           }
1684         if (eptr >= md->end_subject)
1685           {
1686           SCHECK_PARTIAL();
1687           cur_is_word = FALSE;
1688           }
1689         else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1690         }
1691
1692       /* Now see if the situation is what we want */
1693
1694       if ((*ecode++ == OP_WORD_BOUNDARY)?
1695            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1696         RRETURN(MATCH_NOMATCH);
1697       }
1698     break;
1699
1700     /* Match a single character type; inline for speed */
1701
1702     case OP_ANY:
1703     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1704     /* Fall through */
1705
1706     case OP_ALLANY:
1707     if (eptr++ >= md->end_subject)
1708       {
1709       SCHECK_PARTIAL();
1710       RRETURN(MATCH_NOMATCH);
1711       }
1712     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1713     ecode++;
1714     break;
1715
1716     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1717     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1718
1719     case OP_ANYBYTE:
1720     if (eptr++ >= md->end_subject)
1721       {
1722       SCHECK_PARTIAL();
1723       RRETURN(MATCH_NOMATCH);
1724       }
1725     ecode++;
1726     break;
1727
1728     case OP_NOT_DIGIT:
1729     if (eptr >= md->end_subject)
1730       {
1731       SCHECK_PARTIAL();
1732       RRETURN(MATCH_NOMATCH);
1733       }
1734     GETCHARINCTEST(c, eptr);
1735     if (
1736 #ifdef SUPPORT_UTF8
1737        c < 256 &&
1738 #endif
1739        (md->ctypes[c] & ctype_digit) != 0
1740        )
1741       RRETURN(MATCH_NOMATCH);
1742     ecode++;
1743     break;
1744
1745     case OP_DIGIT:
1746     if (eptr >= md->end_subject)
1747       {
1748       SCHECK_PARTIAL();
1749       RRETURN(MATCH_NOMATCH);
1750       }
1751     GETCHARINCTEST(c, eptr);
1752     if (
1753 #ifdef SUPPORT_UTF8
1754        c >= 256 ||
1755 #endif
1756        (md->ctypes[c] & ctype_digit) == 0
1757        )
1758       RRETURN(MATCH_NOMATCH);
1759     ecode++;
1760     break;
1761
1762     case OP_NOT_WHITESPACE:
1763     if (eptr >= md->end_subject)
1764       {
1765       SCHECK_PARTIAL();
1766       RRETURN(MATCH_NOMATCH);
1767       }
1768     GETCHARINCTEST(c, eptr);
1769     if (
1770 #ifdef SUPPORT_UTF8
1771        c < 256 &&
1772 #endif
1773        (md->ctypes[c] & ctype_space) != 0
1774        )
1775       RRETURN(MATCH_NOMATCH);
1776     ecode++;
1777     break;
1778
1779     case OP_WHITESPACE:
1780     if (eptr >= md->end_subject)
1781       {
1782       SCHECK_PARTIAL();
1783       RRETURN(MATCH_NOMATCH);
1784       }
1785     GETCHARINCTEST(c, eptr);
1786     if (
1787 #ifdef SUPPORT_UTF8
1788        c >= 256 ||
1789 #endif
1790        (md->ctypes[c] & ctype_space) == 0
1791        )
1792       RRETURN(MATCH_NOMATCH);
1793     ecode++;
1794     break;
1795
1796     case OP_NOT_WORDCHAR:
1797     if (eptr >= md->end_subject)
1798       {
1799       SCHECK_PARTIAL();
1800       RRETURN(MATCH_NOMATCH);
1801       }
1802     GETCHARINCTEST(c, eptr);
1803     if (
1804 #ifdef SUPPORT_UTF8
1805        c < 256 &&
1806 #endif
1807        (md->ctypes[c] & ctype_word) != 0
1808        )
1809       RRETURN(MATCH_NOMATCH);
1810     ecode++;
1811     break;
1812
1813     case OP_WORDCHAR:
1814     if (eptr >= md->end_subject)
1815       {
1816       SCHECK_PARTIAL();
1817       RRETURN(MATCH_NOMATCH);
1818       }
1819     GETCHARINCTEST(c, eptr);
1820     if (
1821 #ifdef SUPPORT_UTF8
1822        c >= 256 ||
1823 #endif
1824        (md->ctypes[c] & ctype_word) == 0
1825        )
1826       RRETURN(MATCH_NOMATCH);
1827     ecode++;
1828     break;
1829
1830     case OP_ANYNL:
1831     if (eptr >= md->end_subject)
1832       {
1833       SCHECK_PARTIAL();
1834       RRETURN(MATCH_NOMATCH);
1835       }
1836     GETCHARINCTEST(c, eptr);
1837     switch(c)
1838       {
1839       default: RRETURN(MATCH_NOMATCH);
1840       case 0x000d:
1841       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1842       break;
1843
1844       case 0x000a:
1845       break;
1846
1847       case 0x000b:
1848       case 0x000c:
1849       case 0x0085:
1850       case 0x2028:
1851       case 0x2029:
1852       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1853       break;
1854       }
1855     ecode++;
1856     break;
1857
1858     case OP_NOT_HSPACE:
1859     if (eptr >= md->end_subject)
1860       {
1861       SCHECK_PARTIAL();
1862       RRETURN(MATCH_NOMATCH);
1863       }
1864     GETCHARINCTEST(c, eptr);
1865     switch(c)
1866       {
1867       default: break;
1868       case 0x09:      /* HT */
1869       case 0x20:      /* SPACE */
1870       case 0xa0:      /* NBSP */
1871       case 0x1680:    /* OGHAM SPACE MARK */
1872       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1873       case 0x2000:    /* EN QUAD */
1874       case 0x2001:    /* EM QUAD */
1875       case 0x2002:    /* EN SPACE */
1876       case 0x2003:    /* EM SPACE */
1877       case 0x2004:    /* THREE-PER-EM SPACE */
1878       case 0x2005:    /* FOUR-PER-EM SPACE */
1879       case 0x2006:    /* SIX-PER-EM SPACE */
1880       case 0x2007:    /* FIGURE SPACE */
1881       case 0x2008:    /* PUNCTUATION SPACE */
1882       case 0x2009:    /* THIN SPACE */
1883       case 0x200A:    /* HAIR SPACE */
1884       case 0x202f:    /* NARROW NO-BREAK SPACE */
1885       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1886       case 0x3000:    /* IDEOGRAPHIC SPACE */
1887       RRETURN(MATCH_NOMATCH);
1888       }
1889     ecode++;
1890     break;
1891
1892     case OP_HSPACE:
1893     if (eptr >= md->end_subject)
1894       {
1895       SCHECK_PARTIAL();
1896       RRETURN(MATCH_NOMATCH);
1897       }
1898     GETCHARINCTEST(c, eptr);
1899     switch(c)
1900       {
1901       default: RRETURN(MATCH_NOMATCH);
1902       case 0x09:      /* HT */
1903       case 0x20:      /* SPACE */
1904       case 0xa0:      /* NBSP */
1905       case 0x1680:    /* OGHAM SPACE MARK */
1906       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1907       case 0x2000:    /* EN QUAD */
1908       case 0x2001:    /* EM QUAD */
1909       case 0x2002:    /* EN SPACE */
1910       case 0x2003:    /* EM SPACE */
1911       case 0x2004:    /* THREE-PER-EM SPACE */
1912       case 0x2005:    /* FOUR-PER-EM SPACE */
1913       case 0x2006:    /* SIX-PER-EM SPACE */
1914       case 0x2007:    /* FIGURE SPACE */
1915       case 0x2008:    /* PUNCTUATION SPACE */
1916       case 0x2009:    /* THIN SPACE */
1917       case 0x200A:    /* HAIR SPACE */
1918       case 0x202f:    /* NARROW NO-BREAK SPACE */
1919       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1920       case 0x3000:    /* IDEOGRAPHIC SPACE */
1921       break;
1922       }
1923     ecode++;
1924     break;
1925
1926     case OP_NOT_VSPACE:
1927     if (eptr >= md->end_subject)
1928       {
1929       SCHECK_PARTIAL();
1930       RRETURN(MATCH_NOMATCH);
1931       }
1932     GETCHARINCTEST(c, eptr);
1933     switch(c)
1934       {
1935       default: break;
1936       case 0x0a:      /* LF */
1937       case 0x0b:      /* VT */
1938       case 0x0c:      /* FF */
1939       case 0x0d:      /* CR */
1940       case 0x85:      /* NEL */
1941       case 0x2028:    /* LINE SEPARATOR */
1942       case 0x2029:    /* PARAGRAPH SEPARATOR */
1943       RRETURN(MATCH_NOMATCH);
1944       }
1945     ecode++;
1946     break;
1947
1948     case OP_VSPACE:
1949     if (eptr >= md->end_subject)
1950       {
1951       SCHECK_PARTIAL();
1952       RRETURN(MATCH_NOMATCH);
1953       }
1954     GETCHARINCTEST(c, eptr);
1955     switch(c)
1956       {
1957       default: RRETURN(MATCH_NOMATCH);
1958       case 0x0a:      /* LF */
1959       case 0x0b:      /* VT */
1960       case 0x0c:      /* FF */
1961       case 0x0d:      /* CR */
1962       case 0x85:      /* NEL */
1963       case 0x2028:    /* LINE SEPARATOR */
1964       case 0x2029:    /* PARAGRAPH SEPARATOR */
1965       break;
1966       }
1967     ecode++;
1968     break;
1969
1970 #ifdef SUPPORT_UCP
1971     /* Check the next character by Unicode property. We will get here only
1972     if the support is in the binary; otherwise a compile-time error occurs. */
1973
1974     case OP_PROP:
1975     case OP_NOTPROP:
1976     if (eptr >= md->end_subject)
1977       {
1978       SCHECK_PARTIAL();
1979       RRETURN(MATCH_NOMATCH);
1980       }
1981     GETCHARINCTEST(c, eptr);
1982       {
1983       int chartype = UCD_CHARTYPE(c);
1984       switch(ecode[1])
1985         {
1986         case PT_ANY:
1987         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1988         break;
1989
1990         case PT_LAMP:
1991         if ((chartype == ucp_Lu ||
1992              chartype == ucp_Ll ||
1993              chartype == ucp_Lt) == (op == OP_NOTPROP))
1994           RRETURN(MATCH_NOMATCH);
1995          break;
1996
1997         case PT_GC:
1998         if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
1999           RRETURN(MATCH_NOMATCH);
2000         break;
2001
2002         case PT_PC:
2003         if ((ecode[2] != chartype) == (op == OP_PROP))
2004           RRETURN(MATCH_NOMATCH);
2005         break;
2006
2007         case PT_SC:
2008         if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
2009           RRETURN(MATCH_NOMATCH);
2010         break;
2011
2012         default:
2013         RRETURN(PCRE_ERROR_INTERNAL);
2014         }
2015
2016       ecode += 3;
2017       }
2018     break;
2019
2020     /* Match an extended Unicode sequence. We will get here only if the support
2021     is in the binary; otherwise a compile-time error occurs. */
2022
2023     case OP_EXTUNI:
2024     if (eptr >= md->end_subject)
2025       {
2026       SCHECK_PARTIAL();
2027       RRETURN(MATCH_NOMATCH);
2028       }
2029     GETCHARINCTEST(c, eptr);
2030       {
2031       int category = UCD_CATEGORY(c);
2032       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2033       while (eptr < md->end_subject)
2034         {
2035         int len = 1;
2036         if (!utf8) c = *eptr; else
2037           {
2038           GETCHARLEN(c, eptr, len);
2039           }
2040         category = UCD_CATEGORY(c);
2041         if (category != ucp_M) break;
2042         eptr += len;
2043         }
2044       }
2045     ecode++;
2046     break;
2047 #endif
2048
2049
2050     /* Match a back reference, possibly repeatedly. Look past the end of the
2051     item to see if there is repeat information following. The code is similar
2052     to that for character classes, but repeated for efficiency. Then obey
2053     similar code to character type repeats - written out again for speed.
2054     However, if the referenced string is the empty string, always treat
2055     it as matched, any number of times (otherwise there could be infinite
2056     loops). */
2057
2058     case OP_REF:
2059       {
2060       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2061       ecode += 3;
2062
2063       /* If the reference is unset, there are two possibilities:
2064
2065       (a) In the default, Perl-compatible state, set the length to be longer
2066       than the amount of subject left; this ensures that every attempt at a
2067       match fails. We can't just fail here, because of the possibility of
2068       quantifiers with zero minima.
2069
2070       (b) If the JavaScript compatibility flag is set, set the length to zero
2071       so that the back reference matches an empty string.
2072
2073       Otherwise, set the length to the length of what was matched by the
2074       referenced subpattern. */
2075
2076       if (offset >= offset_top || md->offset_vector[offset] < 0)
2077         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2078       else
2079         length = md->offset_vector[offset+1] - md->offset_vector[offset];
2080
2081       /* Set up for repetition, or handle the non-repeated case */
2082
2083       switch (*ecode)
2084         {
2085         case OP_CRSTAR:
2086         case OP_CRMINSTAR:
2087         case OP_CRPLUS:
2088         case OP_CRMINPLUS:
2089         case OP_CRQUERY:
2090         case OP_CRMINQUERY:
2091         c = *ecode++ - OP_CRSTAR;
2092         minimize = (c & 1) != 0;
2093         min = rep_min[c];                 /* Pick up values from tables; */
2094         max = rep_max[c];                 /* zero for max => infinity */
2095         if (max == 0) max = INT_MAX;
2096         break;
2097
2098         case OP_CRRANGE:
2099         case OP_CRMINRANGE:
2100         minimize = (*ecode == OP_CRMINRANGE);
2101         min = GET2(ecode, 1);
2102         max = GET2(ecode, 3);
2103         if (max == 0) max = INT_MAX;
2104         ecode += 5;
2105         break;
2106
2107         default:               /* No repeat follows */
2108         if (!match_ref(offset, eptr, length, md, ims))
2109           {
2110           CHECK_PARTIAL();
2111           RRETURN(MATCH_NOMATCH);
2112           }
2113         eptr += length;
2114         continue;              /* With the main loop */
2115         }
2116
2117       /* If the length of the reference is zero, just continue with the
2118       main loop. */
2119
2120       if (length == 0) continue;
2121
2122       /* First, ensure the minimum number of matches are present. We get back
2123       the length of the reference string explicitly rather than passing the
2124       address of eptr, so that eptr can be a register variable. */
2125
2126       for (i = 1; i <= min; i++)
2127         {
2128         if (!match_ref(offset, eptr, length, md, ims))
2129           {
2130           CHECK_PARTIAL();
2131           RRETURN(MATCH_NOMATCH);
2132           }
2133         eptr += length;
2134         }
2135
2136       /* If min = max, continue at the same level without recursion.
2137       They are not both allowed to be zero. */
2138
2139       if (min == max) continue;
2140
2141       /* If minimizing, keep trying and advancing the pointer */
2142
2143       if (minimize)
2144         {
2145         for (fi = min;; fi++)
2146           {
2147           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2148           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2149           if (fi >= max) RRETURN(MATCH_NOMATCH);
2150           if (!match_ref(offset, eptr, length, md, ims))
2151             {
2152             CHECK_PARTIAL();
2153             RRETURN(MATCH_NOMATCH);
2154             }
2155           eptr += length;
2156           }
2157         /* Control never gets here */
2158         }
2159
2160       /* If maximizing, find the longest string and work backwards */
2161
2162       else
2163         {
2164         pp = eptr;
2165         for (i = min; i < max; i++)
2166           {
2167           if (!match_ref(offset, eptr, length, md, ims))
2168             {
2169             CHECK_PARTIAL();
2170             break;
2171             }
2172           eptr += length;
2173           }
2174         while (eptr >= pp)
2175           {
2176           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2177           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2178           eptr -= length;
2179           }
2180         RRETURN(MATCH_NOMATCH);
2181         }
2182       }
2183     /* Control never gets here */
2184
2185     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2186     used when all the characters in the class have values in the range 0-255,
2187     and either the matching is caseful, or the characters are in the range
2188     0-127 when UTF-8 processing is enabled. The only difference between
2189     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2190     encountered.
2191
2192     First, look past the end of the item to see if there is repeat information
2193     following. Then obey similar code to character type repeats - written out
2194     again for speed. */
2195
2196     case OP_NCLASS:
2197     case OP_CLASS:
2198       {
2199       data = ecode + 1;                /* Save for matching */
2200       ecode += 33;                     /* Advance past the item */
2201
2202       switch (*ecode)
2203         {
2204         case OP_CRSTAR:
2205         case OP_CRMINSTAR:
2206         case OP_CRPLUS:
2207         case OP_CRMINPLUS:
2208         case OP_CRQUERY:
2209         case OP_CRMINQUERY:
2210         c = *ecode++ - OP_CRSTAR;
2211         minimize = (c & 1) != 0;
2212         min = rep_min[c];                 /* Pick up values from tables; */
2213         max = rep_max[c];                 /* zero for max => infinity */
2214         if (max == 0) max = INT_MAX;
2215         break;
2216
2217         case OP_CRRANGE:
2218         case OP_CRMINRANGE:
2219         minimize = (*ecode == OP_CRMINRANGE);
2220         min = GET2(ecode, 1);
2221         max = GET2(ecode, 3);
2222         if (max == 0) max = INT_MAX;
2223         ecode += 5;
2224         break;
2225
2226         default:               /* No repeat follows */
2227         min = max = 1;
2228         break;
2229         }
2230
2231       /* First, ensure the minimum number of matches are present. */
2232
2233 #ifdef SUPPORT_UTF8
2234       /* UTF-8 mode */
2235       if (utf8)
2236         {
2237         for (i = 1; i <= min; i++)
2238           {
2239           if (eptr >= md->end_subject)
2240             {
2241             SCHECK_PARTIAL();
2242             RRETURN(MATCH_NOMATCH);
2243             }
2244           GETCHARINC(c, eptr);
2245           if (c > 255)
2246             {
2247             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2248             }
2249           else
2250             {
2251             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2252             }
2253           }
2254         }
2255       else
2256 #endif
2257       /* Not UTF-8 mode */
2258         {
2259         for (i = 1; i <= min; i++)
2260           {
2261           if (eptr >= md->end_subject)
2262             {
2263             SCHECK_PARTIAL();
2264             RRETURN(MATCH_NOMATCH);
2265             }
2266           c = *eptr++;
2267           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2268           }
2269         }
2270
2271       /* If max == min we can continue with the main loop without the
2272       need to recurse. */
2273
2274       if (min == max) continue;
2275
2276       /* If minimizing, keep testing the rest of the expression and advancing
2277       the pointer while it matches the class. */
2278
2279       if (minimize)
2280         {
2281 #ifdef SUPPORT_UTF8
2282         /* UTF-8 mode */
2283         if (utf8)
2284           {
2285           for (fi = min;; fi++)
2286             {
2287             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2288             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2289             if (fi >= max) RRETURN(MATCH_NOMATCH);
2290             if (eptr >= md->end_subject)
2291               {
2292               SCHECK_PARTIAL();
2293               RRETURN(MATCH_NOMATCH);
2294               }
2295             GETCHARINC(c, eptr);
2296             if (c > 255)
2297               {
2298               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2299               }
2300             else
2301               {
2302               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2303               }
2304             }
2305           }
2306         else
2307 #endif
2308         /* Not UTF-8 mode */
2309           {
2310           for (fi = min;; fi++)
2311             {
2312             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2313             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2314             if (fi >= max) RRETURN(MATCH_NOMATCH);
2315             if (eptr >= md->end_subject)
2316               {
2317               SCHECK_PARTIAL();
2318               RRETURN(MATCH_NOMATCH);
2319               }
2320             c = *eptr++;
2321             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2322             }
2323           }
2324         /* Control never gets here */
2325         }
2326
2327       /* If maximizing, find the longest possible run, then work backwards. */
2328
2329       else
2330         {
2331         pp = eptr;
2332
2333 #ifdef SUPPORT_UTF8
2334         /* UTF-8 mode */
2335         if (utf8)
2336           {
2337           for (i = min; i < max; i++)
2338             {
2339             int len = 1;
2340             if (eptr >= md->end_subject)
2341               {
2342               SCHECK_PARTIAL();
2343               break;
2344               }
2345             GETCHARLEN(c, eptr, len);
2346             if (c > 255)
2347               {
2348               if (op == OP_CLASS) break;
2349               }
2350             else
2351               {
2352               if ((data[c/8] & (1 << (c&7))) == 0) break;
2353               }
2354             eptr += len;
2355             }
2356           for (;;)
2357             {
2358             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2359             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2360             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2361             BACKCHAR(eptr);
2362             }
2363           }
2364         else
2365 #endif
2366           /* Not UTF-8 mode */
2367           {
2368           for (i = min; i < max; i++)
2369             {
2370             if (eptr >= md->end_subject)
2371               {
2372               SCHECK_PARTIAL();
2373               break;
2374               }
2375             c = *eptr;
2376             if ((data[c/8] & (1 << (c&7))) == 0) break;
2377             eptr++;
2378             }
2379           while (eptr >= pp)
2380             {
2381             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2382             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2383             eptr--;
2384             }
2385           }
2386
2387         RRETURN(MATCH_NOMATCH);
2388         }
2389       }
2390     /* Control never gets here */
2391
2392
2393     /* Match an extended character class. This opcode is encountered only
2394     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2395     mode, because Unicode properties are supported in non-UTF-8 mode. */
2396
2397 #ifdef SUPPORT_UTF8
2398     case OP_XCLASS:
2399       {
2400       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
2401       ecode += GET(ecode, 1);                      /* Advance past the item */
2402
2403       switch (*ecode)
2404         {
2405         case OP_CRSTAR:
2406         case OP_CRMINSTAR:
2407         case OP_CRPLUS:
2408         case OP_CRMINPLUS:
2409         case OP_CRQUERY:
2410         case OP_CRMINQUERY:
2411         c = *ecode++ - OP_CRSTAR;
2412         minimize = (c & 1) != 0;
2413         min = rep_min[c];                 /* Pick up values from tables; */
2414         max = rep_max[c];                 /* zero for max => infinity */
2415         if (max == 0) max = INT_MAX;
2416         break;
2417
2418         case OP_CRRANGE:
2419         case OP_CRMINRANGE:
2420         minimize = (*ecode == OP_CRMINRANGE);
2421         min = GET2(ecode, 1);
2422         max = GET2(ecode, 3);
2423         if (max == 0) max = INT_MAX;
2424         ecode += 5;
2425         break;
2426
2427         default:               /* No repeat follows */
2428         min = max = 1;
2429         break;
2430         }
2431
2432       /* First, ensure the minimum number of matches are present. */
2433
2434       for (i = 1; i <= min; i++)
2435         {
2436         if (eptr >= md->end_subject)
2437           {
2438           SCHECK_PARTIAL();
2439           RRETURN(MATCH_NOMATCH);
2440           }
2441         GETCHARINCTEST(c, eptr);
2442         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2443         }
2444
2445       /* If max == min we can continue with the main loop without the
2446       need to recurse. */
2447
2448       if (min == max) continue;
2449
2450       /* If minimizing, keep testing the rest of the expression and advancing
2451       the pointer while it matches the class. */
2452
2453       if (minimize)
2454         {
2455         for (fi = min;; fi++)
2456           {
2457           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2458           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2459           if (fi >= max) RRETURN(MATCH_NOMATCH);
2460           if (eptr >= md->end_subject)
2461             {
2462             SCHECK_PARTIAL();
2463             RRETURN(MATCH_NOMATCH);
2464             }
2465           GETCHARINCTEST(c, eptr);
2466           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2467           }
2468         /* Control never gets here */
2469         }
2470
2471       /* If maximizing, find the longest possible run, then work backwards. */
2472
2473       else
2474         {
2475         pp = eptr;
2476         for (i = min; i < max; i++)
2477           {
2478           int len = 1;
2479           if (eptr >= md->end_subject)
2480             {
2481             SCHECK_PARTIAL();
2482             break;
2483             }
2484           GETCHARLENTEST(c, eptr, len);
2485           if (!_pcre_xclass(c, data)) break;
2486           eptr += len;
2487           }
2488         for(;;)
2489           {
2490           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2491           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2492           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2493           if (utf8) BACKCHAR(eptr);
2494           }
2495         RRETURN(MATCH_NOMATCH);
2496         }
2497
2498       /* Control never gets here */
2499       }
2500 #endif    /* End of XCLASS */
2501
2502     /* Match a single character, casefully */
2503
2504     case OP_CHAR:
2505 #ifdef SUPPORT_UTF8
2506     if (utf8)
2507       {
2508       length = 1;
2509       ecode++;
2510       GETCHARLEN(fc, ecode, length);
2511       if (length > md->end_subject - eptr)
2512         {
2513         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2514         RRETURN(MATCH_NOMATCH);
2515         }
2516       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2517       }
2518     else
2519 #endif
2520
2521     /* Non-UTF-8 mode */
2522       {
2523       if (md->end_subject - eptr < 1)
2524         {
2525         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2526         RRETURN(MATCH_NOMATCH);
2527         }
2528       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2529       ecode += 2;
2530       }
2531     break;
2532
2533     /* Match a single character, caselessly */
2534
2535     case OP_CHARNC:
2536 #ifdef SUPPORT_UTF8
2537     if (utf8)
2538       {
2539       length = 1;
2540       ecode++;
2541       GETCHARLEN(fc, ecode, length);
2542
2543       if (length > md->end_subject - eptr)
2544         {
2545         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2546         RRETURN(MATCH_NOMATCH);
2547         }
2548
2549       /* If the pattern character's value is < 128, we have only one byte, and
2550       can use the fast lookup table. */
2551
2552       if (fc < 128)
2553         {
2554         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2555         }
2556
2557       /* Otherwise we must pick up the subject character */
2558
2559       else
2560         {
2561         unsigned int dc;
2562         GETCHARINC(dc, eptr);
2563         ecode += length;
2564
2565         /* If we have Unicode property support, we can use it to test the other
2566         case of the character, if there is one. */
2567
2568         if (fc != dc)
2569           {
2570 #ifdef SUPPORT_UCP
2571           if (dc != UCD_OTHERCASE(fc))
2572 #endif
2573             RRETURN(MATCH_NOMATCH);
2574           }
2575         }
2576       }
2577     else
2578 #endif   /* SUPPORT_UTF8 */
2579
2580     /* Non-UTF-8 mode */
2581       {
2582       if (md->end_subject - eptr < 1)
2583         {
2584         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2585         RRETURN(MATCH_NOMATCH);
2586         }
2587       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2588       ecode += 2;
2589       }
2590     break;
2591
2592     /* Match a single character repeatedly. */
2593
2594     case OP_EXACT:
2595     min = max = GET2(ecode, 1);
2596     ecode += 3;
2597     goto REPEATCHAR;
2598
2599     case OP_POSUPTO:
2600     possessive = TRUE;
2601     /* Fall through */
2602
2603     case OP_UPTO:
2604     case OP_MINUPTO:
2605     min = 0;
2606     max = GET2(ecode, 1);
2607     minimize = *ecode == OP_MINUPTO;
2608     ecode += 3;
2609     goto REPEATCHAR;
2610
2611     case OP_POSSTAR:
2612     possessive = TRUE;
2613     min = 0;
2614     max = INT_MAX;
2615     ecode++;
2616     goto REPEATCHAR;
2617
2618     case OP_POSPLUS:
2619     possessive = TRUE;
2620     min = 1;
2621     max = INT_MAX;
2622     ecode++;
2623     goto REPEATCHAR;
2624
2625     case OP_POSQUERY:
2626     possessive = TRUE;
2627     min = 0;
2628     max = 1;
2629     ecode++;
2630     goto REPEATCHAR;
2631
2632     case OP_STAR:
2633     case OP_MINSTAR:
2634     case OP_PLUS:
2635     case OP_MINPLUS:
2636     case OP_QUERY:
2637     case OP_MINQUERY:
2638     c = *ecode++ - OP_STAR;
2639     minimize = (c & 1) != 0;
2640
2641     min = rep_min[c];                 /* Pick up values from tables; */
2642     max = rep_max[c];                 /* zero for max => infinity */
2643     if (max == 0) max = INT_MAX;
2644
2645     /* Common code for all repeated single-character matches. */
2646
2647     REPEATCHAR:
2648 #ifdef SUPPORT_UTF8
2649     if (utf8)
2650       {
2651       length = 1;
2652       charptr = ecode;
2653       GETCHARLEN(fc, ecode, length);
2654       ecode += length;
2655
2656       /* Handle multibyte character matching specially here. There is
2657       support for caseless matching if UCP support is present. */
2658
2659       if (length > 1)
2660         {
2661 #ifdef SUPPORT_UCP
2662         unsigned int othercase;
2663         if ((ims & PCRE_CASELESS) != 0 &&
2664             (othercase = UCD_OTHERCASE(fc)) != fc)
2665           oclength = _pcre_ord2utf8(othercase, occhars);
2666         else oclength = 0;
2667 #endif  /* SUPPORT_UCP */
2668
2669         for (i = 1; i <= min; i++)
2670           {
2671           if (eptr <= md->end_subject - length &&
2672             memcmp(eptr, charptr, length) == 0) eptr += length;
2673 #ifdef SUPPORT_UCP
2674           else if (oclength > 0 &&
2675                    eptr <= md->end_subject - oclength &&
2676                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2677 #endif  /* SUPPORT_UCP */
2678           else
2679             {
2680             CHECK_PARTIAL();
2681             RRETURN(MATCH_NOMATCH);
2682             }
2683           }
2684
2685         if (min == max) continue;
2686
2687         if (minimize)
2688           {
2689           for (fi = min;; fi++)
2690             {
2691             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2692             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2693             if (fi >= max) RRETURN(MATCH_NOMATCH);
2694             if (eptr <= md->end_subject - length &&
2695               memcmp(eptr, charptr, length) == 0) eptr += length;
2696 #ifdef SUPPORT_UCP
2697             else if (oclength > 0 &&
2698                      eptr <= md->end_subject - oclength &&
2699                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2700 #endif  /* SUPPORT_UCP */
2701             else
2702               {
2703               CHECK_PARTIAL();
2704               RRETURN(MATCH_NOMATCH);
2705               }
2706             }
2707           /* Control never gets here */
2708           }
2709
2710         else  /* Maximize */
2711           {
2712           pp = eptr;
2713           for (i = min; i < max; i++)
2714             {
2715             if (eptr <= md->end_subject - length &&
2716                 memcmp(eptr, charptr, length) == 0) eptr += length;
2717 #ifdef SUPPORT_UCP
2718             else if (oclength > 0 &&
2719                      eptr <= md->end_subject - oclength &&
2720                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2721 #endif  /* SUPPORT_UCP */
2722             else
2723               {
2724               CHECK_PARTIAL();
2725               break;
2726               }
2727             }
2728
2729           if (possessive) continue;
2730
2731           for(;;)
2732             {
2733             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2734             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2735             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2736 #ifdef SUPPORT_UCP
2737             eptr--;
2738             BACKCHAR(eptr);
2739 #else   /* without SUPPORT_UCP */
2740             eptr -= length;
2741 #endif  /* SUPPORT_UCP */
2742             }
2743           }
2744         /* Control never gets here */
2745         }
2746
2747       /* If the length of a UTF-8 character is 1, we fall through here, and
2748       obey the code as for non-UTF-8 characters below, though in this case the
2749       value of fc will always be < 128. */
2750       }
2751     else
2752 #endif  /* SUPPORT_UTF8 */
2753
2754     /* When not in UTF-8 mode, load a single-byte character. */
2755
2756     fc = *ecode++;
2757
2758     /* The value of fc at this point is always less than 256, though we may or
2759     may not be in UTF-8 mode. The code is duplicated for the caseless and
2760     caseful cases, for speed, since matching characters is likely to be quite
2761     common. First, ensure the minimum number of matches are present. If min =
2762     max, continue at the same level without recursing. Otherwise, if
2763     minimizing, keep trying the rest of the expression and advancing one
2764     matching character if failing, up to the maximum. Alternatively, if
2765     maximizing, find the maximum number of characters and work backwards. */
2766
2767     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2768       max, eptr));
2769
2770     if ((ims & PCRE_CASELESS) != 0)
2771       {
2772       fc = md->lcc[fc];
2773       for (i = 1; i <= min; i++)
2774         {
2775         if (eptr >= md->end_subject)
2776           {
2777           SCHECK_PARTIAL();
2778           RRETURN(MATCH_NOMATCH);
2779           }
2780         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2781         }
2782       if (min == max) continue;
2783       if (minimize)
2784         {
2785         for (fi = min;; fi++)
2786           {
2787           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2788           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2789           if (fi >= max) RRETURN(MATCH_NOMATCH);
2790           if (eptr >= md->end_subject)
2791             {
2792             SCHECK_PARTIAL();
2793             RRETURN(MATCH_NOMATCH);
2794             }
2795           if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2796           }
2797         /* Control never gets here */
2798         }
2799       else  /* Maximize */
2800         {
2801         pp = eptr;
2802         for (i = min; i < max; i++)
2803           {
2804           if (eptr >= md->end_subject)
2805             {
2806             SCHECK_PARTIAL();
2807             break;
2808             }
2809           if (fc != md->lcc[*eptr]) break;
2810           eptr++;
2811           }
2812
2813         if (possessive) continue;
2814
2815         while (eptr >= pp)
2816           {
2817           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2818           eptr--;
2819           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2820           }
2821         RRETURN(MATCH_NOMATCH);
2822         }
2823       /* Control never gets here */
2824       }
2825
2826     /* Caseful comparisons (includes all multi-byte characters) */
2827
2828     else
2829       {
2830       for (i = 1; i <= min; i++)
2831         {
2832         if (eptr >= md->end_subject)
2833           {
2834           SCHECK_PARTIAL();
2835           RRETURN(MATCH_NOMATCH);
2836           }
2837         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2838         }
2839
2840       if (min == max) continue;
2841
2842       if (minimize)
2843         {
2844         for (fi = min;; fi++)
2845           {
2846           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2847           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2848           if (fi >= max) RRETURN(MATCH_NOMATCH);
2849           if (eptr >= md->end_subject)
2850             {
2851             SCHECK_PARTIAL();
2852             RRETURN(MATCH_NOMATCH);
2853             }
2854           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2855           }
2856         /* Control never gets here */
2857         }
2858       else  /* Maximize */
2859         {
2860         pp = eptr;
2861         for (i = min; i < max; i++)
2862           {
2863           if (eptr >= md->end_subject)
2864             {
2865             SCHECK_PARTIAL();
2866             break;
2867             }
2868           if (fc != *eptr) break;
2869           eptr++;
2870           }
2871         if (possessive) continue;
2872
2873         while (eptr >= pp)
2874           {
2875           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2876           eptr--;
2877           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2878           }
2879         RRETURN(MATCH_NOMATCH);
2880         }
2881       }
2882     /* Control never gets here */
2883
2884     /* Match a negated single one-byte character. The character we are
2885     checking can be multibyte. */
2886
2887     case OP_NOT:
2888     if (eptr >= md->end_subject)
2889       {
2890       SCHECK_PARTIAL();
2891       RRETURN(MATCH_NOMATCH);
2892       }
2893     ecode++;
2894     GETCHARINCTEST(c, eptr);
2895     if ((ims & PCRE_CASELESS) != 0)
2896       {
2897 #ifdef SUPPORT_UTF8
2898       if (c < 256)
2899 #endif
2900       c = md->lcc[c];
2901       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2902       }
2903     else
2904       {
2905       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2906       }
2907     break;
2908
2909     /* Match a negated single one-byte character repeatedly. This is almost a
2910     repeat of the code for a repeated single character, but I haven't found a
2911     nice way of commoning these up that doesn't require a test of the
2912     positive/negative option for each character match. Maybe that wouldn't add
2913     very much to the time taken, but character matching *is* what this is all
2914     about... */
2915
2916     case OP_NOTEXACT:
2917     min = max = GET2(ecode, 1);
2918     ecode += 3;
2919     goto REPEATNOTCHAR;
2920
2921     case OP_NOTUPTO:
2922     case OP_NOTMINUPTO:
2923     min = 0;
2924     max = GET2(ecode, 1);
2925     minimize = *ecode == OP_NOTMINUPTO;
2926     ecode += 3;
2927     goto REPEATNOTCHAR;
2928
2929     case OP_NOTPOSSTAR:
2930     possessive = TRUE;
2931     min = 0;
2932     max = INT_MAX;
2933     ecode++;
2934     goto REPEATNOTCHAR;
2935
2936     case OP_NOTPOSPLUS:
2937     possessive = TRUE;
2938     min = 1;
2939     max = INT_MAX;
2940     ecode++;
2941     goto REPEATNOTCHAR;
2942
2943     case OP_NOTPOSQUERY:
2944     possessive = TRUE;
2945     min = 0;
2946     max = 1;
2947     ecode++;
2948     goto REPEATNOTCHAR;
2949
2950     case OP_NOTPOSUPTO:
2951     possessive = TRUE;
2952     min = 0;
2953     max = GET2(ecode, 1);
2954     ecode += 3;
2955     goto REPEATNOTCHAR;
2956
2957     case OP_NOTSTAR:
2958     case OP_NOTMINSTAR:
2959     case OP_NOTPLUS:
2960     case OP_NOTMINPLUS:
2961     case OP_NOTQUERY:
2962     case OP_NOTMINQUERY:
2963     c = *ecode++ - OP_NOTSTAR;
2964     minimize = (c & 1) != 0;
2965     min = rep_min[c];                 /* Pick up values from tables; */
2966     max = rep_max[c];                 /* zero for max => infinity */
2967     if (max == 0) max = INT_MAX;
2968
2969     /* Common code for all repeated single-byte matches. */
2970
2971     REPEATNOTCHAR:
2972     fc = *ecode++;
2973
2974     /* The code is duplicated for the caseless and caseful cases, for speed,
2975     since matching characters is likely to be quite common. First, ensure the
2976     minimum number of matches are present. If min = max, continue at the same
2977     level without recursing. Otherwise, if minimizing, keep trying the rest of
2978     the expression and advancing one matching character if failing, up to the
2979     maximum. Alternatively, if maximizing, find the maximum number of
2980     characters and work backwards. */
2981
2982     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2983       max, eptr));
2984
2985     if ((ims & PCRE_CASELESS) != 0)
2986       {
2987       fc = md->lcc[fc];
2988
2989 #ifdef SUPPORT_UTF8
2990       /* UTF-8 mode */
2991       if (utf8)
2992         {
2993         register unsigned int d;
2994         for (i = 1; i <= min; i++)
2995           {
2996           if (eptr >= md->end_subject)
2997             {
2998             SCHECK_PARTIAL();
2999             RRETURN(MATCH_NOMATCH);
3000             }
3001           GETCHARINC(d, eptr);
3002           if (d < 256) d = md->lcc[d];
3003           if (fc == d) RRETURN(MATCH_NOMATCH);
3004           }
3005         }
3006       else
3007 #endif
3008
3009       /* Not UTF-8 mode */
3010         {
3011         for (i = 1; i <= min; i++)
3012           {
3013           if (eptr >= md->end_subject)
3014             {
3015             SCHECK_PARTIAL();
3016             RRETURN(MATCH_NOMATCH);
3017             }
3018           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3019           }
3020         }
3021
3022       if (min == max) continue;
3023
3024       if (minimize)
3025         {
3026 #ifdef SUPPORT_UTF8
3027         /* UTF-8 mode */
3028         if (utf8)
3029           {
3030           register unsigned int d;
3031           for (fi = min;; fi++)
3032             {
3033             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3034             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3035             if (fi >= max) RRETURN(MATCH_NOMATCH);
3036             if (eptr >= md->end_subject)
3037               {
3038               SCHECK_PARTIAL();
3039               RRETURN(MATCH_NOMATCH);
3040               }
3041             GETCHARINC(d, eptr);
3042             if (d < 256) d = md->lcc[d];
3043             if (fc == d) RRETURN(MATCH_NOMATCH);
3044             }
3045           }
3046         else
3047 #endif
3048         /* Not UTF-8 mode */
3049           {
3050           for (fi = min;; fi++)
3051             {
3052             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3053             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3054             if (fi >= max) RRETURN(MATCH_NOMATCH);
3055             if (eptr >= md->end_subject)
3056               {
3057               SCHECK_PARTIAL();
3058               RRETURN(MATCH_NOMATCH);
3059               }
3060             if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3061             }
3062           }
3063         /* Control never gets here */
3064         }
3065
3066       /* Maximize case */
3067
3068       else
3069         {
3070         pp = eptr;
3071
3072 #ifdef SUPPORT_UTF8
3073         /* UTF-8 mode */
3074         if (utf8)
3075           {
3076           register unsigned int d;
3077           for (i = min; i < max; i++)
3078             {
3079             int len = 1;
3080             if (eptr >= md->end_subject)
3081               {
3082               SCHECK_PARTIAL();
3083               break;
3084               }
3085             GETCHARLEN(d, eptr, len);
3086             if (d < 256) d = md->lcc[d];
3087             if (fc == d) break;
3088             eptr += len;
3089             }
3090         if (possessive) continue;
3091         for(;;)
3092             {
3093             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3094             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3095             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3096             BACKCHAR(eptr);
3097             }
3098           }
3099         else
3100 #endif
3101         /* Not UTF-8 mode */
3102           {
3103           for (i = min; i < max; i++)
3104             {
3105             if (eptr >= md->end_subject)
3106               {
3107               SCHECK_PARTIAL();
3108               break;
3109               }
3110             if (fc == md->lcc[*eptr]) break;
3111             eptr++;
3112             }
3113           if (possessive) continue;
3114           while (eptr >= pp)
3115             {
3116             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3117             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3118             eptr--;
3119             }
3120           }
3121
3122         RRETURN(MATCH_NOMATCH);
3123         }
3124       /* Control never gets here */
3125       }
3126
3127     /* Caseful comparisons */
3128
3129     else
3130       {
3131 #ifdef SUPPORT_UTF8
3132       /* UTF-8 mode */
3133       if (utf8)
3134         {
3135         register unsigned int d;
3136         for (i = 1; i <= min; i++)
3137           {
3138           if (eptr >= md->end_subject)
3139             {
3140             SCHECK_PARTIAL();
3141             RRETURN(MATCH_NOMATCH);
3142             }
3143           GETCHARINC(d, eptr);
3144           if (fc == d) RRETURN(MATCH_NOMATCH);
3145           }
3146         }
3147       else
3148 #endif
3149       /* Not UTF-8 mode */
3150         {
3151         for (i = 1; i <= min; i++)
3152           {
3153           if (eptr >= md->end_subject)
3154             {
3155             SCHECK_PARTIAL();
3156             RRETURN(MATCH_NOMATCH);
3157             }
3158           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3159           }
3160         }
3161
3162       if (min == max) continue;
3163
3164       if (minimize)
3165         {
3166 #ifdef SUPPORT_UTF8
3167         /* UTF-8 mode */
3168         if (utf8)
3169           {
3170           register unsigned int d;
3171           for (fi = min;; fi++)
3172             {
3173             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3174             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3175             if (fi >= max) RRETURN(MATCH_NOMATCH);
3176             if (eptr >= md->end_subject)
3177               {
3178               SCHECK_PARTIAL();
3179               RRETURN(MATCH_NOMATCH);
3180               }
3181             GETCHARINC(d, eptr);
3182             if (fc == d) RRETURN(MATCH_NOMATCH);
3183             }
3184           }
3185         else
3186 #endif
3187         /* Not UTF-8 mode */
3188           {
3189           for (fi = min;; fi++)
3190             {
3191             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3192             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3193             if (fi >= max) RRETURN(MATCH_NOMATCH);
3194             if (eptr >= md->end_subject)
3195               {
3196               SCHECK_PARTIAL();
3197               RRETURN(MATCH_NOMATCH);
3198               }
3199             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3200             }
3201           }
3202         /* Control never gets here */
3203         }
3204
3205       /* Maximize case */
3206
3207       else
3208         {
3209         pp = eptr;
3210
3211 #ifdef SUPPORT_UTF8
3212         /* UTF-8 mode */
3213         if (utf8)
3214           {
3215           register unsigned int d;
3216           for (i = min; i < max; i++)
3217             {
3218             int len = 1;
3219             if (eptr >= md->end_subject)
3220               {
3221               SCHECK_PARTIAL();
3222               break;
3223               }
3224             GETCHARLEN(d, eptr, len);
3225             if (fc == d) break;
3226             eptr += len;
3227             }
3228           if (possessive) continue;
3229           for(;;)
3230             {
3231             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3232             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3233             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3234             BACKCHAR(eptr);
3235             }
3236           }
3237         else
3238 #endif
3239         /* Not UTF-8 mode */
3240           {
3241           for (i = min; i < max; i++)
3242             {
3243             if (eptr >= md->end_subject)
3244               {
3245               SCHECK_PARTIAL();
3246               break;
3247               }
3248             if (fc == *eptr) break;
3249             eptr++;
3250             }
3251           if (possessive) continue;
3252           while (eptr >= pp)
3253             {
3254             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3255             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3256             eptr--;
3257             }
3258           }
3259
3260         RRETURN(MATCH_NOMATCH);
3261         }
3262       }
3263     /* Control never gets here */
3264
3265     /* Match a single character type repeatedly; several different opcodes
3266     share code. This is very similar to the code for single characters, but we
3267     repeat it in the interests of efficiency. */
3268
3269     case OP_TYPEEXACT:
3270     min = max = GET2(ecode, 1);
3271     minimize = TRUE;
3272     ecode += 3;
3273     goto REPEATTYPE;
3274
3275     case OP_TYPEUPTO:
3276     case OP_TYPEMINUPTO:
3277     min = 0;
3278     max = GET2(ecode, 1);
3279     minimize = *ecode == OP_TYPEMINUPTO;
3280     ecode += 3;
3281     goto REPEATTYPE;
3282
3283     case OP_TYPEPOSSTAR:
3284     possessive = TRUE;
3285     min = 0;
3286     max = INT_MAX;
3287     ecode++;
3288     goto REPEATTYPE;
3289
3290     case OP_TYPEPOSPLUS:
3291     possessive = TRUE;
3292     min = 1;
3293     max = INT_MAX;
3294     ecode++;
3295     goto REPEATTYPE;
3296
3297     case OP_TYPEPOSQUERY:
3298     possessive = TRUE;
3299     min = 0;
3300     max = 1;
3301     ecode++;
3302     goto REPEATTYPE;
3303
3304     case OP_TYPEPOSUPTO:
3305     possessive = TRUE;
3306     min = 0;
3307     max = GET2(ecode, 1);
3308     ecode += 3;
3309     goto REPEATTYPE;
3310
3311     case OP_TYPESTAR:
3312     case OP_TYPEMINSTAR:
3313     case OP_TYPEPLUS:
3314     case OP_TYPEMINPLUS:
3315     case OP_TYPEQUERY:
3316     case OP_TYPEMINQUERY:
3317     c = *ecode++ - OP_TYPESTAR;
3318     minimize = (c & 1) != 0;
3319     min = rep_min[c];                 /* Pick up values from tables; */
3320     max = rep_max[c];                 /* zero for max => infinity */
3321     if (max == 0) max = INT_MAX;
3322
3323     /* Common code for all repeated single character type matches. Note that
3324     in UTF-8 mode, '.' matches a character of any length, but for the other
3325     character types, the valid characters are all one-byte long. */
3326
3327     REPEATTYPE:
3328     ctype = *ecode++;      /* Code for the character type */
3329
3330 #ifdef SUPPORT_UCP
3331     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3332       {
3333       prop_fail_result = ctype == OP_NOTPROP;
3334       prop_type = *ecode++;
3335       prop_value = *ecode++;
3336       }
3337     else prop_type = -1;
3338 #endif
3339
3340     /* First, ensure the minimum number of matches are present. Use inline
3341     code for maximizing the speed, and do the type test once at the start
3342     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3343     is tidier. Also separate the UCP code, which can be the same for both UTF-8
3344     and single-bytes. */
3345
3346     if (min > 0)
3347       {
3348 #ifdef SUPPORT_UCP
3349       if (prop_type >= 0)
3350         {
3351         switch(prop_type)
3352           {
3353           case PT_ANY:
3354           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3355           for (i = 1; i <= min; i++)
3356             {
3357             if (eptr >= md->end_subject)
3358               {
3359               SCHECK_PARTIAL();
3360               RRETURN(MATCH_NOMATCH);
3361               }
3362             GETCHARINCTEST(c, eptr);
3363             }
3364           break;
3365
3366           case PT_LAMP:
3367           for (i = 1; i <= min; i++)
3368             {
3369             if (eptr >= md->end_subject)
3370               {
3371               SCHECK_PARTIAL();
3372               RRETURN(MATCH_NOMATCH);
3373               }
3374             GETCHARINCTEST(c, eptr);
3375             prop_chartype = UCD_CHARTYPE(c);
3376             if ((prop_chartype == ucp_Lu ||
3377                  prop_chartype == ucp_Ll ||
3378                  prop_chartype == ucp_Lt) == prop_fail_result)
3379               RRETURN(MATCH_NOMATCH);
3380             }
3381           break;
3382
3383           case PT_GC:
3384           for (i = 1; i <= min; i++)
3385             {
3386             if (eptr >= md->end_subject)
3387               {
3388               SCHECK_PARTIAL();
3389               RRETURN(MATCH_NOMATCH);
3390               }
3391             GETCHARINCTEST(c, eptr);
3392             prop_category = UCD_CATEGORY(c);
3393             if ((prop_category == prop_value) == prop_fail_result)
3394               RRETURN(MATCH_NOMATCH);
3395             }
3396           break;
3397
3398           case PT_PC:
3399           for (i = 1; i <= min; i++)
3400             {
3401             if (eptr >= md->end_subject)
3402               {
3403               SCHECK_PARTIAL();
3404               RRETURN(MATCH_NOMATCH);
3405               }
3406             GETCHARINCTEST(c, eptr);
3407             prop_chartype = UCD_CHARTYPE(c);
3408             if ((prop_chartype == prop_value) == prop_fail_result)
3409               RRETURN(MATCH_NOMATCH);
3410             }
3411           break;
3412
3413           case PT_SC:
3414           for (i = 1; i <= min; i++)
3415             {
3416             if (eptr >= md->end_subject)
3417               {
3418               SCHECK_PARTIAL();
3419               RRETURN(MATCH_NOMATCH);
3420               }
3421             GETCHARINCTEST(c, eptr);
3422             prop_script = UCD_SCRIPT(c);
3423             if ((prop_script == prop_value) == prop_fail_result)
3424               RRETURN(MATCH_NOMATCH);
3425             }
3426           break;
3427
3428           default:
3429           RRETURN(PCRE_ERROR_INTERNAL);
3430           }
3431         }
3432
3433       /* Match extended Unicode sequences. We will get here only if the
3434       support is in the binary; otherwise a compile-time error occurs. */
3435
3436       else if (ctype == OP_EXTUNI)
3437         {
3438         for (i = 1; i <= min; i++)
3439           {
3440           if (eptr >= md->end_subject)
3441             {
3442             SCHECK_PARTIAL();
3443             RRETURN(MATCH_NOMATCH);
3444             }
3445           GETCHARINCTEST(c, eptr);
3446           prop_category = UCD_CATEGORY(c);
3447           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3448           while (eptr < md->end_subject)
3449             {
3450             int len = 1;
3451             if (!utf8) c = *eptr;
3452               else { GETCHARLEN(c, eptr, len); }
3453             prop_category = UCD_CATEGORY(c);
3454             if (prop_category != ucp_M) break;
3455             eptr += len;
3456             }
3457           }
3458         }
3459
3460       else
3461 #endif     /* SUPPORT_UCP */
3462
3463 /* Handle all other cases when the coding is UTF-8 */
3464
3465 #ifdef SUPPORT_UTF8
3466       if (utf8) switch(ctype)
3467         {
3468         case OP_ANY:
3469         for (i = 1; i <= min; i++)
3470           {
3471           if (eptr >= md->end_subject)
3472             {
3473             SCHECK_PARTIAL();
3474             RRETURN(MATCH_NOMATCH);
3475             }
3476           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3477           eptr++;
3478           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3479           }
3480         break;
3481
3482         case OP_ALLANY:
3483         for (i = 1; i <= min; i++)
3484           {
3485           if (eptr >= md->end_subject)
3486             {
3487             SCHECK_PARTIAL();
3488             RRETURN(MATCH_NOMATCH);
3489             }
3490           eptr++;
3491           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3492           }
3493         break;
3494
3495         case OP_ANYBYTE:
3496         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3497         eptr += min;
3498         break;
3499
3500         case OP_ANYNL:
3501         for (i = 1; i <= min; i++)
3502           {
3503           if (eptr >= md->end_subject)
3504             {
3505             SCHECK_PARTIAL();
3506             RRETURN(MATCH_NOMATCH);
3507             }
3508           GETCHARINC(c, eptr);
3509           switch(c)
3510             {
3511             default: RRETURN(MATCH_NOMATCH);
3512             case 0x000d:
3513             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3514             break;
3515
3516             case 0x000a:
3517             break;
3518
3519             case 0x000b:
3520             case 0x000c:
3521             case 0x0085:
3522             case 0x2028:
3523             case 0x2029:
3524             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3525             break;
3526             }
3527           }
3528         break;
3529
3530         case OP_NOT_HSPACE:
3531         for (i = 1; i <= min; i++)
3532           {
3533           if (eptr >= md->end_subject)
3534             {
3535             SCHECK_PARTIAL();
3536             RRETURN(MATCH_NOMATCH);
3537             }
3538           GETCHARINC(c, eptr);
3539           switch(c)
3540             {
3541             default: break;
3542             case 0x09:      /* HT */
3543             case 0x20:      /* SPACE */
3544             case 0xa0:      /* NBSP */
3545             case 0x1680:    /* OGHAM SPACE MARK */
3546             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3547             case 0x2000:    /* EN QUAD */
3548             case 0x2001:    /* EM QUAD */
3549             case 0x2002:    /* EN SPACE */
3550             case 0x2003:    /* EM SPACE */
3551             case 0x2004:    /* THREE-PER-EM SPACE */
3552             case 0x2005:    /* FOUR-PER-EM SPACE */
3553             case 0x2006:    /* SIX-PER-EM SPACE */
3554             case 0x2007:    /* FIGURE SPACE */
3555             case 0x2008:    /* PUNCTUATION SPACE */
3556             case 0x2009:    /* THIN SPACE */
3557             case 0x200A:    /* HAIR SPACE */
3558             case 0x202f:    /* NARROW NO-BREAK SPACE */
3559             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3560             case 0x3000:    /* IDEOGRAPHIC SPACE */
3561             RRETURN(MATCH_NOMATCH);
3562             }
3563           }
3564         break;
3565
3566         case OP_HSPACE:
3567         for (i = 1; i <= min; i++)
3568           {
3569           if (eptr >= md->end_subject)
3570             {
3571             SCHECK_PARTIAL();
3572             RRETURN(MATCH_NOMATCH);
3573             }
3574           GETCHARINC(c, eptr);
3575           switch(c)
3576             {
3577             default: RRETURN(MATCH_NOMATCH);
3578             case 0x09:      /* HT */
3579             case 0x20:      /* SPACE */
3580             case 0xa0:      /* NBSP */
3581             case 0x1680:    /* OGHAM SPACE MARK */
3582             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3583             case 0x2000:    /* EN QUAD */
3584             case 0x2001:    /* EM QUAD */
3585             case 0x2002:    /* EN SPACE */
3586             case 0x2003:    /* EM SPACE */
3587             case 0x2004:    /* THREE-PER-EM SPACE */
3588             case 0x2005:    /* FOUR-PER-EM SPACE */
3589             case 0x2006:    /* SIX-PER-EM SPACE */
3590             case 0x2007:    /* FIGURE SPACE */
3591             case 0x2008:    /* PUNCTUATION SPACE */
3592             case 0x2009:    /* THIN SPACE */
3593             case 0x200A:    /* HAIR SPACE */
3594             case 0x202f:    /* NARROW NO-BREAK SPACE */
3595             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3596             case 0x3000:    /* IDEOGRAPHIC SPACE */
3597             break;
3598             }
3599           }
3600         break;
3601
3602         case OP_NOT_VSPACE:
3603         for (i = 1; i <= min; i++)
3604           {
3605           if (eptr >= md->end_subject)
3606             {
3607             SCHECK_PARTIAL();
3608             RRETURN(MATCH_NOMATCH);
3609             }
3610           GETCHARINC(c, eptr);
3611           switch(c)
3612             {
3613             default: break;
3614             case 0x0a:      /* LF */
3615             case 0x0b:      /* VT */
3616             case 0x0c:      /* FF */
3617             case 0x0d:      /* CR */
3618             case 0x85:      /* NEL */
3619             case 0x2028:    /* LINE SEPARATOR */
3620             case 0x2029:    /* PARAGRAPH SEPARATOR */
3621             RRETURN(MATCH_NOMATCH);
3622             }
3623           }
3624         break;
3625
3626         case OP_VSPACE:
3627         for (i = 1; i <= min; i++)
3628           {
3629           if (eptr >= md->end_subject)
3630             {
3631             SCHECK_PARTIAL();
3632             RRETURN(MATCH_NOMATCH);
3633             }
3634           GETCHARINC(c, eptr);
3635           switch(c)
3636             {
3637             default: RRETURN(MATCH_NOMATCH);
3638             case 0x0a:      /* LF */
3639             case 0x0b:      /* VT */
3640             case 0x0c:      /* FF */
3641             case 0x0d:      /* CR */
3642             case 0x85:      /* NEL */
3643             case 0x2028:    /* LINE SEPARATOR */
3644             case 0x2029:    /* PARAGRAPH SEPARATOR */
3645             break;
3646             }
3647           }
3648         break;
3649
3650         case OP_NOT_DIGIT:
3651         for (i = 1; i <= min; i++)
3652           {
3653           if (eptr >= md->end_subject)
3654             {
3655             SCHECK_PARTIAL();
3656             RRETURN(MATCH_NOMATCH);
3657             }
3658           GETCHARINC(c, eptr);
3659           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3660             RRETURN(MATCH_NOMATCH);
3661           }
3662         break;
3663
3664         case OP_DIGIT:
3665         for (i = 1; i <= min; i++)
3666           {
3667           if (eptr >= md->end_subject)
3668             {
3669             SCHECK_PARTIAL();
3670             RRETURN(MATCH_NOMATCH);
3671             }
3672           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3673             RRETURN(MATCH_NOMATCH);
3674           /* No need to skip more bytes - we know it's a 1-byte character */
3675           }
3676         break;
3677
3678         case OP_NOT_WHITESPACE:
3679         for (i = 1; i <= min; i++)
3680           {
3681           if (eptr >= md->end_subject)
3682             {
3683             SCHECK_PARTIAL();
3684             RRETURN(MATCH_NOMATCH);
3685             }
3686           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3687             RRETURN(MATCH_NOMATCH);
3688           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3689           }
3690         break;
3691
3692         case OP_WHITESPACE:
3693         for (i = 1; i <= min; i++)
3694           {
3695           if (eptr >= md->end_subject)
3696             {
3697             SCHECK_PARTIAL();
3698             RRETURN(MATCH_NOMATCH);
3699             }
3700           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3701             RRETURN(MATCH_NOMATCH);
3702           /* No need to skip more bytes - we know it's a 1-byte character */
3703           }
3704         break;
3705
3706         case OP_NOT_WORDCHAR:
3707         for (i = 1; i <= min; i++)
3708           {
3709           if (eptr >= md->end_subject)
3710             {
3711             SCHECK_PARTIAL();
3712             RRETURN(MATCH_NOMATCH);
3713             }
3714           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3715             RRETURN(MATCH_NOMATCH);
3716           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3717           }
3718         break;
3719
3720         case OP_WORDCHAR:
3721         for (i = 1; i <= min; i++)
3722           {
3723           if (eptr >= md->end_subject)
3724             {
3725             SCHECK_PARTIAL();
3726             RRETURN(MATCH_NOMATCH);
3727             }
3728           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3729             RRETURN(MATCH_NOMATCH);
3730           /* No need to skip more bytes - we know it's a 1-byte character */
3731           }
3732         break;
3733
3734         default:
3735         RRETURN(PCRE_ERROR_INTERNAL);
3736         }  /* End switch(ctype) */
3737
3738       else
3739 #endif     /* SUPPORT_UTF8 */
3740
3741       /* Code for the non-UTF-8 case for minimum matching of operators other
3742       than OP_PROP and OP_NOTPROP. */
3743
3744       switch(ctype)
3745         {
3746         case OP_ANY:
3747         for (i = 1; i <= min; i++)
3748           {
3749           if (eptr >= md->end_subject)
3750             {
3751             SCHECK_PARTIAL();
3752             RRETURN(MATCH_NOMATCH);
3753             }
3754           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3755           eptr++;
3756           }
3757         break;
3758
3759         case OP_ALLANY:
3760         if (eptr > md->end_subject - min)
3761           {
3762           SCHECK_PARTIAL();
3763           RRETURN(MATCH_NOMATCH);
3764           }
3765         eptr += min;
3766         break;
3767
3768         case OP_ANYBYTE:
3769         if (eptr > md->end_subject - min)
3770           {
3771           SCHECK_PARTIAL();
3772           RRETURN(MATCH_NOMATCH);
3773           }
3774         eptr += min;
3775         break;
3776
3777         case OP_ANYNL:
3778         for (i = 1; i <= min; i++)
3779           {
3780           if (eptr >= md->end_subject)
3781             {
3782             SCHECK_PARTIAL();
3783             RRETURN(MATCH_NOMATCH);
3784             }
3785           switch(*eptr++)
3786             {
3787             default: RRETURN(MATCH_NOMATCH);
3788             case 0x000d:
3789             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790             break;
3791             case 0x000a:
3792             break;
3793
3794             case 0x000b:
3795             case 0x000c:
3796             case 0x0085:
3797             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3798             break;
3799             }
3800           }
3801         break;
3802
3803         case OP_NOT_HSPACE:
3804         for (i = 1; i <= min; i++)
3805           {
3806           if (eptr >= md->end_subject)
3807             {
3808             SCHECK_PARTIAL();
3809             RRETURN(MATCH_NOMATCH);
3810             }
3811           switch(*eptr++)
3812             {
3813             default: break;
3814             case 0x09:      /* HT */
3815             case 0x20:      /* SPACE */
3816             case 0xa0:      /* NBSP */
3817             RRETURN(MATCH_NOMATCH);
3818             }
3819           }
3820         break;
3821
3822         case OP_HSPACE:
3823         for (i = 1; i <= min; i++)
3824           {
3825           if (eptr >= md->end_subject)
3826             {
3827             SCHECK_PARTIAL();
3828             RRETURN(MATCH_NOMATCH);
3829             }
3830           switch(*eptr++)
3831             {
3832             default: RRETURN(MATCH_NOMATCH);
3833             case 0x09:      /* HT */
3834             case 0x20:      /* SPACE */
3835             case 0xa0:      /* NBSP */
3836             break;
3837             }
3838           }
3839         break;
3840
3841         case OP_NOT_VSPACE:
3842         for (i = 1; i <= min; i++)
3843           {
3844           if (eptr >= md->end_subject)
3845             {
3846             SCHECK_PARTIAL();
3847             RRETURN(MATCH_NOMATCH);
3848             }
3849           switch(*eptr++)
3850             {
3851             default: break;
3852             case 0x0a:      /* LF */
3853             case 0x0b:      /* VT */
3854             case 0x0c:      /* FF */
3855             case 0x0d:      /* CR */
3856             case 0x85:      /* NEL */
3857             RRETURN(MATCH_NOMATCH);
3858             }
3859           }
3860         break;
3861
3862         case OP_VSPACE:
3863         for (i = 1; i <= min; i++)
3864           {
3865           if (eptr >= md->end_subject)
3866             {
3867             SCHECK_PARTIAL();
3868             RRETURN(MATCH_NOMATCH);
3869             }
3870           switch(*eptr++)
3871             {
3872             default: RRETURN(MATCH_NOMATCH);
3873             case 0x0a:      /* LF */
3874             case 0x0b:      /* VT */
3875             case 0x0c:      /* FF */
3876             case 0x0d:      /* CR */
3877             case 0x85:      /* NEL */
3878             break;
3879             }
3880           }
3881         break;
3882
3883         case OP_NOT_DIGIT:
3884         for (i = 1; i <= min; i++)
3885           {
3886           if (eptr >= md->end_subject)
3887             {
3888             SCHECK_PARTIAL();
3889             RRETURN(MATCH_NOMATCH);
3890             }
3891           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3892           }
3893         break;
3894
3895         case OP_DIGIT:
3896         for (i = 1; i <= min; i++)
3897           {
3898           if (eptr >= md->end_subject)
3899             {
3900             SCHECK_PARTIAL();
3901             RRETURN(MATCH_NOMATCH);
3902             }
3903           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3904           }
3905         break;
3906
3907         case OP_NOT_WHITESPACE:
3908         for (i = 1; i <= min; i++)
3909           {
3910           if (eptr >= md->end_subject)
3911             {
3912             SCHECK_PARTIAL();
3913             RRETURN(MATCH_NOMATCH);
3914             }
3915           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3916           }
3917         break;
3918
3919         case OP_WHITESPACE:
3920         for (i = 1; i <= min; i++)
3921           {
3922           if (eptr >= md->end_subject)
3923             {
3924             SCHECK_PARTIAL();
3925             RRETURN(MATCH_NOMATCH);
3926             }
3927           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3928           }
3929         break;
3930
3931         case OP_NOT_WORDCHAR:
3932         for (i = 1; i <= min; i++)
3933           {
3934           if (eptr >= md->end_subject)
3935             {
3936             SCHECK_PARTIAL();
3937             RRETURN(MATCH_NOMATCH);
3938             }
3939           if ((md->ctypes[*eptr++] & ctype_word) != 0)
3940             RRETURN(MATCH_NOMATCH);
3941           }
3942         break;
3943
3944         case OP_WORDCHAR:
3945         for (i = 1; i <= min; i++)
3946           {
3947           if (eptr >= md->end_subject)
3948             {
3949             SCHECK_PARTIAL();
3950             RRETURN(MATCH_NOMATCH);
3951             }
3952           if ((md->ctypes[*eptr++] & ctype_word) == 0)
3953             RRETURN(MATCH_NOMATCH);
3954           }
3955         break;
3956
3957         default:
3958         RRETURN(PCRE_ERROR_INTERNAL);
3959         }
3960       }
3961
3962     /* If min = max, continue at the same level without recursing */
3963
3964     if (min == max) continue;
3965
3966     /* If minimizing, we have to test the rest of the pattern before each
3967     subsequent match. Again, separate the UTF-8 case for speed, and also
3968     separate the UCP cases. */
3969
3970     if (minimize)
3971       {
3972 #ifdef SUPPORT_UCP
3973       if (prop_type >= 0)
3974         {
3975         switch(prop_type)
3976           {
3977           case PT_ANY:
3978           for (fi = min;; fi++)
3979             {
3980             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3981             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3982             if (fi >= max) RRETURN(MATCH_NOMATCH);
3983             if (eptr >= md->end_subject)
3984               {
3985               SCHECK_PARTIAL();
3986               RRETURN(MATCH_NOMATCH);
3987               }
3988             GETCHARINC(c, eptr);
3989             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3990             }
3991           /* Control never gets here */
3992
3993           case PT_LAMP:
3994           for (fi = min;; fi++)
3995             {
3996             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3997             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3998             if (fi >= max) RRETURN(MATCH_NOMATCH);
3999             if (eptr >= md->end_subject)
4000               {
4001               SCHECK_PARTIAL();
4002               RRETURN(MATCH_NOMATCH);
4003               }
4004             GETCHARINC(c, eptr);
4005             prop_chartype = UCD_CHARTYPE(c);
4006             if ((prop_chartype == ucp_Lu ||
4007                  prop_chartype == ucp_Ll ||
4008                  prop_chartype == ucp_Lt) == prop_fail_result)
4009               RRETURN(MATCH_NOMATCH);
4010             }
4011           /* Control never gets here */
4012
4013           case PT_GC:
4014           for (fi = min;; fi++)
4015             {
4016             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4017             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4018             if (fi >= max) RRETURN(MATCH_NOMATCH);
4019             if (eptr >= md->end_subject)
4020               {
4021               SCHECK_PARTIAL();
4022               RRETURN(MATCH_NOMATCH);
4023               }
4024             GETCHARINC(c, eptr);
4025             prop_category = UCD_CATEGORY(c);
4026             if ((prop_category == prop_value) == prop_fail_result)
4027               RRETURN(MATCH_NOMATCH);
4028             }
4029           /* Control never gets here */
4030
4031           case PT_PC:
4032           for (fi = min;; fi++)
4033             {
4034             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4035             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4036             if (fi >= max) RRETURN(MATCH_NOMATCH);
4037             if (eptr >= md->end_subject)
4038               {
4039               SCHECK_PARTIAL();
4040               RRETURN(MATCH_NOMATCH);
4041               }
4042             GETCHARINC(c, eptr);
4043             prop_chartype = UCD_CHARTYPE(c);
4044             if ((prop_chartype == prop_value) == prop_fail_result)
4045               RRETURN(MATCH_NOMATCH);
4046             }
4047           /* Control never gets here */
4048
4049           case PT_SC:
4050           for (fi = min;; fi++)
4051             {
4052             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4053             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4054             if (fi >= max) RRETURN(MATCH_NOMATCH);
4055             if (eptr >= md->end_subject)
4056               {
4057               SCHECK_PARTIAL();
4058               RRETURN(MATCH_NOMATCH);
4059               }
4060             GETCHARINC(c, eptr);
4061             prop_script = UCD_SCRIPT(c);
4062             if ((prop_script == prop_value) == prop_fail_result)
4063               RRETURN(MATCH_NOMATCH);
4064             }
4065           /* Control never gets here */
4066
4067           default:
4068           RRETURN(PCRE_ERROR_INTERNAL);
4069           }
4070         }
4071
4072       /* Match extended Unicode sequences. We will get here only if the
4073       support is in the binary; otherwise a compile-time error occurs. */
4074
4075       else if (ctype == OP_EXTUNI)
4076         {
4077         for (fi = min;; fi++)
4078           {
4079           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4080           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4081           if (fi >= max) RRETURN(MATCH_NOMATCH);
4082           if (eptr >= md->end_subject)
4083             {
4084             SCHECK_PARTIAL();
4085             RRETURN(MATCH_NOMATCH);
4086             }
4087           GETCHARINCTEST(c, eptr);
4088           prop_category = UCD_CATEGORY(c);
4089           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
4090           while (eptr < md->end_subject)
4091             {
4092             int len = 1;
4093             if (!utf8) c = *eptr;
4094               else { GETCHARLEN(c, eptr, len); }
4095             prop_category = UCD_CATEGORY(c);
4096             if (prop_category != ucp_M) break;
4097             eptr += len;
4098             }
4099           }
4100         }
4101
4102       else
4103 #endif     /* SUPPORT_UCP */
4104
4105 #ifdef SUPPORT_UTF8
4106       /* UTF-8 mode */
4107       if (utf8)
4108         {
4109         for (fi = min;; fi++)
4110           {
4111           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4112           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4113           if (fi >= max) RRETURN(MATCH_NOMATCH);
4114           if (eptr >= md->end_subject)
4115             {
4116             SCHECK_PARTIAL();
4117             RRETURN(MATCH_NOMATCH);
4118             }
4119           if (ctype == OP_ANY && IS_NEWLINE(eptr))
4120             RRETURN(MATCH_NOMATCH);
4121           GETCHARINC(c, eptr);
4122           switch(ctype)
4123             {
4124             case OP_ANY:        /* This is the non-NL case */
4125             case OP_ALLANY:
4126             case OP_ANYBYTE:
4127             break;
4128
4129             case OP_ANYNL:
4130             switch(c)
4131               {
4132               default: RRETURN(MATCH_NOMATCH);
4133               case 0x000d:
4134               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4135               break;
4136               case 0x000a:
4137               break;
4138
4139               case 0x000b:
4140               case 0x000c:
4141               case 0x0085:
4142               case 0x2028:
4143               case 0x2029:
4144               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4145               break;
4146               }
4147             break;
4148
4149             case OP_NOT_HSPACE:
4150             switch(c)
4151               {
4152               default: break;
4153               case 0x09:      /* HT */
4154               case 0x20:      /* SPACE */
4155               case 0xa0:      /* NBSP */
4156               case 0x1680:    /* OGHAM SPACE MARK */
4157               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4158               case 0x2000:    /* EN QUAD */
4159               case 0x2001:    /* EM QUAD */
4160               case 0x2002:    /* EN SPACE */
4161               case 0x2003:    /* EM SPACE */
4162               case 0x2004:    /* THREE-PER-EM SPACE */
4163               case 0x2005:    /* FOUR-PER-EM SPACE */
4164               case 0x2006:    /* SIX-PER-EM SPACE */
4165               case 0x2007:    /* FIGURE SPACE */
4166               case 0x2008:    /* PUNCTUATION SPACE */
4167               case 0x2009:    /* THIN SPACE */
4168               case 0x200A:    /* HAIR SPACE */
4169               case 0x202f:    /* NARROW NO-BREAK SPACE */
4170               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4171               case 0x3000:    /* IDEOGRAPHIC SPACE */
4172               RRETURN(MATCH_NOMATCH);
4173               }
4174             break;
4175
4176             case OP_HSPACE:
4177             switch(c)
4178               {
4179               default: RRETURN(MATCH_NOMATCH);
4180               case 0x09:      /* HT */
4181               case 0x20:      /* SPACE */
4182               case 0xa0:      /* NBSP */
4183               case 0x1680:    /* OGHAM SPACE MARK */
4184               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4185               case 0x2000:    /* EN QUAD */
4186               case 0x2001:    /* EM QUAD */
4187               case 0x2002:    /* EN SPACE */
4188               case 0x2003:    /* EM SPACE */
4189               case 0x2004:    /* THREE-PER-EM SPACE */
4190               case 0x2005:    /* FOUR-PER-EM SPACE */
4191               case 0x2006:    /* SIX-PER-EM SPACE */
4192               case 0x2007:    /* FIGURE SPACE */
4193               case 0x2008:    /* PUNCTUATION SPACE */
4194               case 0x2009:    /* THIN SPACE */
4195               case 0x200A:    /* HAIR SPACE */
4196               case 0x202f:    /* NARROW NO-BREAK SPACE */
4197               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4198               case 0x3000:    /* IDEOGRAPHIC SPACE */
4199               break;
4200               }
4201             break;
4202
4203             case OP_NOT_VSPACE:
4204             switch(c)
4205               {
4206               default: break;
4207               case 0x0a:      /* LF */
4208               case 0x0b:      /* VT */
4209               case 0x0c:      /* FF */
4210               case 0x0d:      /* CR */
4211               case 0x85:      /* NEL */
4212               case 0x2028:    /* LINE SEPARATOR */
4213               case 0x2029:    /* PARAGRAPH SEPARATOR */
4214               RRETURN(MATCH_NOMATCH);
4215               }
4216             break;
4217
4218             case OP_VSPACE:
4219             switch(c)
4220               {
4221               default: RRETURN(MATCH_NOMATCH);
4222               case 0x0a:      /* LF */
4223               case 0x0b:      /* VT */
4224               case 0x0c:      /* FF */
4225               case 0x0d:      /* CR */
4226               case 0x85:      /* NEL */
4227               case 0x2028:    /* LINE SEPARATOR */
4228               case 0x2029:    /* PARAGRAPH SEPARATOR */
4229               break;
4230               }
4231             break;
4232
4233             case OP_NOT_DIGIT:
4234             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4235               RRETURN(MATCH_NOMATCH);
4236             break;
4237
4238             case OP_DIGIT:
4239             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4240               RRETURN(MATCH_NOMATCH);
4241             break;
4242
4243             case OP_NOT_WHITESPACE:
4244             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4245               RRETURN(MATCH_NOMATCH);
4246             break;
4247
4248             case OP_WHITESPACE:
4249             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4250               RRETURN(MATCH_NOMATCH);
4251             break;
4252
4253             case OP_NOT_WORDCHAR:
4254             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4255               RRETURN(MATCH_NOMATCH);
4256             break;
4257
4258             case OP_WORDCHAR:
4259             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4260               RRETURN(MATCH_NOMATCH);
4261             break;
4262
4263             default:
4264             RRETURN(PCRE_ERROR_INTERNAL);
4265             }
4266           }
4267         }
4268       else
4269 #endif
4270       /* Not UTF-8 mode */
4271         {
4272         for (fi = min;; fi++)
4273           {
4274           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4275           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4276           if (fi >= max) RRETURN(MATCH_NOMATCH);
4277           if (eptr >= md->end_subject)
4278             {
4279             SCHECK_PARTIAL();
4280             RRETURN(MATCH_NOMATCH);
4281             }
4282           if (ctype == OP_ANY && IS_NEWLINE(eptr))
4283             RRETURN(MATCH_NOMATCH);
4284           c = *eptr++;
4285           switch(ctype)
4286             {
4287             case OP_ANY:     /* This is the non-NL case */
4288             case OP_ALLANY:
4289             case OP_ANYBYTE:
4290             break;
4291
4292             case OP_ANYNL:
4293             switch(c)
4294               {
4295               default: RRETURN(MATCH_NOMATCH);
4296               case 0x000d:
4297               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4298               break;
4299
4300               case 0x000a:
4301               break;
4302
4303               case 0x000b:
4304               case 0x000c:
4305               case 0x0085:
4306               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4307               break;
4308               }
4309             break;
4310
4311             case OP_NOT_HSPACE:
4312             switch(c)
4313               {
4314               default: break;
4315               case 0x09:      /* HT */
4316               case 0x20:      /* SPACE */
4317               case 0xa0:      /* NBSP */
4318               RRETURN(MATCH_NOMATCH);
4319               }
4320             break;
4321
4322             case OP_HSPACE:
4323             switch(c)
4324               {
4325               default: RRETURN(MATCH_NOMATCH);
4326               case 0x09:      /* HT */
4327               case 0x20:      /* SPACE */
4328               case 0xa0:      /* NBSP */
4329               break;
4330               }
4331             break;
4332
4333             case OP_NOT_VSPACE:
4334             switch(c)
4335               {
4336               default: break;
4337               case 0x0a:      /* LF */
4338               case 0x0b:      /* VT */
4339               case 0x0c:      /* FF */
4340               case 0x0d:      /* CR */
4341               case 0x85:      /* NEL */
4342               RRETURN(MATCH_NOMATCH);
4343               }
4344             break;
4345
4346             case OP_VSPACE:
4347             switch(c)
4348               {
4349               default: RRETURN(MATCH_NOMATCH);
4350               case 0x0a:      /* LF */
4351               case 0x0b:      /* VT */
4352               case 0x0c:      /* FF */
4353               case 0x0d:      /* CR */
4354               case 0x85:      /* NEL */
4355               break;
4356               }
4357             break;
4358
4359             case OP_NOT_DIGIT:
4360             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
4361             break;
4362
4363             case OP_DIGIT:
4364             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
4365             break;
4366
4367             case OP_NOT_WHITESPACE:
4368             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
4369             break;
4370
4371             case OP_WHITESPACE:
4372             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
4373             break;
4374
4375             case OP_NOT_WORDCHAR:
4376             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
4377             break;
4378
4379             case OP_WORDCHAR:
4380             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
4381             break;
4382
4383             default:
4384             RRETURN(PCRE_ERROR_INTERNAL);
4385             }
4386           }
4387         }
4388       /* Control never gets here */
4389       }
4390
4391     /* If maximizing, it is worth using inline code for speed, doing the type
4392     test once at the start (i.e. keep it out of the loop). Again, keep the
4393     UTF-8 and UCP stuff separate. */
4394
4395     else
4396       {
4397       pp = eptr;  /* Remember where we started */
4398
4399 #ifdef SUPPORT_UCP
4400       if (prop_type >= 0)
4401         {
4402         switch(prop_type)
4403           {
4404           case PT_ANY:
4405           for (i = min; i < max; i++)
4406             {
4407             int len = 1;
4408             if (eptr >= md->end_subject)
4409               {
4410               SCHECK_PARTIAL();
4411               break;
4412               }
4413             GETCHARLEN(c, eptr, len);
4414             if (prop_fail_result) break;
4415             eptr+= len;
4416             }
4417           break;
4418
4419           case PT_LAMP:
4420           for (i = min; i < max; i++)
4421             {
4422             int len = 1;
4423             if (eptr >= md->end_subject)
4424               {
4425               SCHECK_PARTIAL();
4426               break;
4427               }
4428             GETCHARLEN(c, eptr, len);
4429             prop_chartype = UCD_CHARTYPE(c);
4430             if ((prop_chartype == ucp_Lu ||
4431                  prop_chartype == ucp_Ll ||
4432                  prop_chartype == ucp_Lt) == prop_fail_result)
4433               break;
4434             eptr+= len;
4435             }
4436           break;
4437
4438           case PT_GC:
4439           for (i = min; i < max; i++)
4440             {
4441             int len = 1;
4442             if (eptr >= md->end_subject)
4443               {
4444               SCHECK_PARTIAL();
4445               break;
4446               }
4447             GETCHARLEN(c, eptr, len);
4448             prop_category = UCD_CATEGORY(c);
4449             if ((prop_category == prop_value) == prop_fail_result)
4450               break;
4451             eptr+= len;
4452             }
4453           break;
4454
4455           case PT_PC:
4456           for (i = min; i < max; i++)
4457             {
4458             int len = 1;
4459             if (eptr >= md->end_subject)
4460               {
4461               SCHECK_PARTIAL();
4462               break;
4463               }
4464             GETCHARLEN(c, eptr, len);
4465             prop_chartype = UCD_CHARTYPE(c);
4466             if ((prop_chartype == prop_value) == prop_fail_result)
4467               break;
4468             eptr+= len;
4469             }
4470           break;
4471
4472           case PT_SC:
4473           for (i = min; i < max; i++)
4474             {
4475             int len = 1;
4476             if (eptr >= md->end_subject)
4477               {
4478               SCHECK_PARTIAL();
4479               break;
4480               }
4481             GETCHARLEN(c, eptr, len);
4482             prop_script = UCD_SCRIPT(c);
4483             if ((prop_script == prop_value) == prop_fail_result)
4484               break;
4485             eptr+= len;
4486             }
4487           break;
4488           }
4489
4490         /* eptr is now past the end of the maximum run */
4491
4492         if (possessive) continue;
4493         for(;;)
4494           {
4495           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4496           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4497           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4498           if (utf8) BACKCHAR(eptr);
4499           }
4500         }
4501
4502       /* Match extended Unicode sequences. We will get here only if the
4503       support is in the binary; otherwise a compile-time error occurs. */
4504
4505       else if (ctype == OP_EXTUNI)
4506         {
4507         for (i = min; i < max; i++)
4508           {
4509           if (eptr >= md->end_subject)
4510             {
4511             SCHECK_PARTIAL();
4512             break;
4513             }
4514           GETCHARINCTEST(c, eptr);
4515           prop_category = UCD_CATEGORY(c);
4516           if (prop_category == ucp_M) break;
4517           while (eptr < md->end_subject)
4518             {
4519             int len = 1;
4520             if (!utf8) c = *eptr; else
4521               {
4522               GETCHARLEN(c, eptr, len);
4523               }
4524             prop_category = UCD_CATEGORY(c);
4525             if (prop_category != ucp_M) break;
4526             eptr += len;
4527             }
4528           }
4529
4530         /* eptr is now past the end of the maximum run */
4531
4532         if (possessive) continue;
4533
4534         for(;;)
4535           {
4536           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4537           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4538           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4539           for (;;)                        /* Move back over one extended */
4540             {
4541             int len = 1;
4542             if (!utf8) c = *eptr; else
4543               {
4544               BACKCHAR(eptr);
4545               GETCHARLEN(c, eptr, len);
4546               }
4547             prop_category = UCD_CATEGORY(c);
4548             if (prop_category != ucp_M) break;
4549             eptr--;
4550             }
4551           }
4552         }
4553
4554       else
4555 #endif   /* SUPPORT_UCP */
4556
4557 #ifdef SUPPORT_UTF8
4558       /* UTF-8 mode */
4559
4560       if (utf8)
4561         {
4562         switch(ctype)
4563           {
4564           case OP_ANY:
4565           if (max < INT_MAX)
4566             {
4567             for (i = min; i < max; i++)
4568               {
4569               if (eptr >= md->end_subject)
4570                 {
4571                 SCHECK_PARTIAL();
4572                 break;
4573                 }
4574               if (IS_NEWLINE(eptr)) break;
4575               eptr++;
4576               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4577               }
4578             }
4579
4580           /* Handle unlimited UTF-8 repeat */
4581
4582           else
4583             {
4584             for (i = min; i < max; i++)
4585               {
4586               if (eptr >= md->end_subject)
4587                 {
4588                 SCHECK_PARTIAL();
4589                 break;
4590                 }
4591               if (IS_NEWLINE(eptr)) break;
4592               eptr++;
4593               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4594               }
4595             }
4596           break;
4597
4598           case OP_ALLANY:
4599           if (max < INT_MAX)
4600             {
4601             for (i = min; i < max; i++)
4602               {
4603               if (eptr >= md->end_subject)
4604                 {
4605                 SCHECK_PARTIAL();
4606                 break;
4607                 }
4608               eptr++;
4609               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4610               }
4611             }
4612           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4613           break;
4614
4615           /* The byte case is the same as non-UTF8 */
4616
4617           case OP_ANYBYTE:
4618           c = max - min;
4619           if (c > (unsigned int)(md->end_subject - eptr))
4620             {
4621             eptr = md->end_subject;
4622             SCHECK_PARTIAL();
4623             }
4624           else eptr += c;
4625           break;
4626
4627           case OP_ANYNL:
4628           for (i = min; i < max; i++)
4629             {
4630             int len = 1;
4631             if (eptr >= md->end_subject)
4632               {
4633               SCHECK_PARTIAL();
4634               break;
4635               }
4636             GETCHARLEN(c, eptr, len);
4637             if (c == 0x000d)
4638               {
4639               if (++eptr >= md->end_subject) break;
4640               if (*eptr == 0x000a) eptr++;
4641               }
4642             else
4643               {
4644               if (c != 0x000a &&
4645                   (md->bsr_anycrlf ||
4646                    (c != 0x000b && c != 0x000c &&
4647                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
4648                 break;
4649               eptr += len;
4650               }
4651             }
4652           break;
4653
4654           case OP_NOT_HSPACE:
4655           case OP_HSPACE:
4656           for (i = min; i < max; i++)
4657             {
4658             BOOL gotspace;
4659             int len = 1;
4660             if (eptr >= md->end_subject)
4661               {
4662               SCHECK_PARTIAL();
4663               break;
4664               }
4665             GETCHARLEN(c, eptr, len);
4666             switch(c)
4667               {
4668               default: gotspace = FALSE; break;
4669               case 0x09:      /* HT */
4670               case 0x20:      /* SPACE */
4671               case 0xa0:      /* NBSP */
4672               case 0x1680:    /* OGHAM SPACE MARK */
4673               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4674               case 0x2000:    /* EN QUAD */
4675               case 0x2001:    /* EM QUAD */
4676               case 0x2002:    /* EN SPACE */
4677               case 0x2003:    /* EM SPACE */
4678               case 0x2004:    /* THREE-PER-EM SPACE */
4679               case 0x2005:    /* FOUR-PER-EM SPACE */
4680               case 0x2006:    /* SIX-PER-EM SPACE */
4681               case 0x2007:    /* FIGURE SPACE */
4682               case 0x2008:    /* PUNCTUATION SPACE */
4683               case 0x2009:    /* THIN SPACE */
4684               case 0x200A:    /* HAIR SPACE */
4685               case 0x202f:    /* NARROW NO-BREAK SPACE */
4686               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4687               case 0x3000:    /* IDEOGRAPHIC SPACE */
4688               gotspace = TRUE;
4689               break;
4690               }
4691             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
4692             eptr += len;
4693             }
4694           break;
4695
4696           case OP_NOT_VSPACE:
4697           case OP_VSPACE:
4698           for (i = min; i < max; i++)
4699             {
4700             BOOL gotspace;
4701             int len = 1;
4702             if (eptr >= md->end_subject)
4703               {
4704               SCHECK_PARTIAL();
4705               break;
4706               }
4707             GETCHARLEN(c, eptr, len);
4708             switch(c)
4709               {
4710               default: gotspace = FALSE; break;
4711               case 0x0a:      /* LF */
4712               case 0x0b:      /* VT */
4713               case 0x0c:      /* FF */
4714               case 0x0d:      /* CR */
4715               case 0x85:      /* NEL */
4716               case 0x2028:    /* LINE SEPARATOR */
4717               case 0x2029:    /* PARAGRAPH SEPARATOR */
4718               gotspace = TRUE;
4719               break;
4720               }
4721             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4722             eptr += len;
4723             }
4724           break;
4725
4726           case OP_NOT_DIGIT:
4727           for (i = min; i < max; i++)
4728             {
4729             int len = 1;
4730             if (eptr >= md->end_subject)
4731               {
4732               SCHECK_PARTIAL();
4733               break;
4734               }
4735             GETCHARLEN(c, eptr, len);
4736             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4737             eptr+= len;
4738             }
4739           break;
4740
4741           case OP_DIGIT:
4742           for (i = min; i < max; i++)
4743             {
4744             int len = 1;
4745             if (eptr >= md->end_subject)
4746               {
4747               SCHECK_PARTIAL();
4748               break;
4749               }
4750             GETCHARLEN(c, eptr, len);
4751             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4752             eptr+= len;
4753             }
4754           break;
4755
4756           case OP_NOT_WHITESPACE:
4757           for (i = min; i < max; i++)
4758             {
4759             int len = 1;
4760             if (eptr >= md->end_subject)
4761               {
4762               SCHECK_PARTIAL();
4763               break;
4764               }
4765             GETCHARLEN(c, eptr, len);
4766             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4767             eptr+= len;
4768             }
4769           break;
4770
4771           case OP_WHITESPACE:
4772           for (i = min; i < max; i++)
4773             {
4774             int len = 1;
4775             if (eptr >= md->end_subject)
4776               {
4777               SCHECK_PARTIAL();
4778               break;
4779               }
4780             GETCHARLEN(c, eptr, len);
4781             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4782             eptr+= len;
4783             }
4784           break;
4785
4786           case OP_NOT_WORDCHAR:
4787           for (i = min; i < max; i++)
4788             {
4789             int len = 1;
4790             if (eptr >= md->end_subject)
4791               {
4792               SCHECK_PARTIAL();
4793               break;
4794               }
4795             GETCHARLEN(c, eptr, len);
4796             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4797             eptr+= len;
4798             }
4799           break;
4800
4801           case OP_WORDCHAR:
4802           for (i = min; i < max; i++)
4803             {
4804             int len = 1;
4805             if (eptr >= md->end_subject)
4806               {
4807               SCHECK_PARTIAL();
4808               break;
4809               }
4810             GETCHARLEN(c, eptr, len);
4811             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4812             eptr+= len;
4813             }
4814           break;
4815
4816           default:
4817           RRETURN(PCRE_ERROR_INTERNAL);
4818           }
4819
4820         /* eptr is now past the end of the maximum run */
4821
4822         if (possessive) continue;
4823         for(;;)
4824           {
4825           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4826           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4827           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4828           BACKCHAR(eptr);
4829           }
4830         }
4831       else
4832 #endif  /* SUPPORT_UTF8 */
4833
4834       /* Not UTF-8 mode */
4835         {
4836         switch(ctype)
4837           {
4838           case OP_ANY:
4839           for (i = min; i < max; i++)
4840             {
4841             if (eptr >= md->end_subject)
4842               {
4843               SCHECK_PARTIAL();
4844               break;
4845               }
4846             if (IS_NEWLINE(eptr)) break;
4847             eptr++;
4848             }
4849           break;
4850
4851           case OP_ALLANY:
4852           case OP_ANYBYTE:
4853           c = max - min;
4854           if (c > (unsigned int)(md->end_subject - eptr))
4855             {
4856             eptr = md->end_subject;
4857             SCHECK_PARTIAL();
4858             }
4859           else eptr += c;
4860           break;
4861
4862           case OP_ANYNL:
4863           for (i = min; i < max; i++)
4864             {
4865             if (eptr >= md->end_subject)
4866               {
4867               SCHECK_PARTIAL();
4868               break;
4869               }
4870             c = *eptr;
4871             if (c == 0x000d)
4872               {
4873               if (++eptr >= md->end_subject) break;
4874               if (*eptr == 0x000a) eptr++;
4875               }
4876             else
4877               {
4878               if (c != 0x000a &&
4879                   (md->bsr_anycrlf ||
4880                     (c != 0x000b && c != 0x000c && c != 0x0085)))
4881                 break;
4882               eptr++;
4883               }
4884             }
4885           break;
4886
4887           case OP_NOT_HSPACE:
4888           for (i = min; i < max; i++)
4889             {
4890             if (eptr >= md->end_subject)
4891               {
4892               SCHECK_PARTIAL();
4893               break;
4894               }
4895             c = *eptr;
4896             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4897             eptr++;
4898             }
4899           break;
4900
4901           case OP_HSPACE:
4902           for (i = min; i < max; i++)
4903             {
4904             if (eptr >= md->end_subject)
4905               {
4906               SCHECK_PARTIAL();
4907               break;
4908               }
4909             c = *eptr;
4910             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4911             eptr++;
4912             }
4913           break;
4914
4915           case OP_NOT_VSPACE:
4916           for (i = min; i < max; i++)
4917             {
4918             if (eptr >= md->end_subject)
4919               {
4920               SCHECK_PARTIAL();
4921               break;
4922               }
4923             c = *eptr;
4924             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4925               break;
4926             eptr++;
4927             }
4928           break;
4929
4930           case OP_VSPACE:
4931           for (i = min; i < max; i++)
4932             {
4933             if (eptr >= md->end_subject)
4934               {
4935               SCHECK_PARTIAL();
4936               break;
4937               }
4938             c = *eptr;
4939             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4940               break;
4941             eptr++;
4942             }
4943           break;
4944
4945           case OP_NOT_DIGIT:
4946           for (i = min; i < max; i++)
4947             {
4948             if (eptr >= md->end_subject)
4949               {
4950               SCHECK_PARTIAL();
4951               break;
4952               }
4953             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
4954             eptr++;
4955             }
4956           break;
4957
4958           case OP_DIGIT:
4959           for (i = min; i < max; i++)
4960             {
4961             if (eptr >= md->end_subject)
4962               {
4963               SCHECK_PARTIAL();
4964               break;
4965               }
4966             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
4967             eptr++;
4968             }
4969           break;
4970
4971           case OP_NOT_WHITESPACE:
4972           for (i = min; i < max; i++)
4973             {
4974             if (eptr >= md->end_subject)
4975               {
4976               SCHECK_PARTIAL();
4977               break;
4978               }
4979             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
4980             eptr++;
4981             }
4982           break;
4983
4984           case OP_WHITESPACE:
4985           for (i = min; i < max; i++)
4986             {
4987             if (eptr >= md->end_subject)
4988               {
4989               SCHECK_PARTIAL();
4990               break;
4991               }
4992             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
4993             eptr++;
4994             }
4995           break;
4996
4997           case OP_NOT_WORDCHAR:
4998           for (i = min; i < max; i++)
4999             {
5000             if (eptr >= md->end_subject)
5001               {
5002               SCHECK_PARTIAL();
5003               break;
5004               }
5005             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5006             eptr++;
5007             }
5008           break;
5009
5010           case OP_WORDCHAR:
5011           for (i = min; i < max; i++)
5012             {
5013             if (eptr >= md->end_subject)
5014               {
5015               SCHECK_PARTIAL();
5016               break;
5017               }
5018             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5019             eptr++;
5020             }
5021           break;
5022
5023           default:
5024           RRETURN(PCRE_ERROR_INTERNAL);
5025           }
5026
5027         /* eptr is now past the end of the maximum run */
5028
5029         if (possessive) continue;
5030         while (eptr >= pp)
5031           {
5032           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5033           eptr--;
5034           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5035           }
5036         }
5037
5038       /* Get here if we can't make it match with any permitted repetitions */
5039
5040       RRETURN(MATCH_NOMATCH);
5041       }
5042     /* Control never gets here */
5043
5044     /* There's been some horrible disaster. Arrival here can only mean there is
5045     something seriously wrong in the code above or the OP_xxx definitions. */
5046
5047     default:
5048     DPRINTF(("Unknown opcode %d\n", *ecode));
5049     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5050     }
5051
5052   /* Do not stick any code in here without much thought; it is assumed
5053   that "continue" in the code above comes out to here to repeat the main
5054   loop. */
5055
5056   }             /* End of main loop */
5057 /* Control never reaches here */
5058
5059
5060 /* When compiling to use the heap rather than the stack for recursive calls to
5061 match(), the RRETURN() macro jumps here. The number that is saved in
5062 frame->Xwhere indicates which label we actually want to return to. */
5063
5064 #ifdef NO_RECURSE
5065 #define LBL(val) case val: goto L_RM##val;
5066 HEAP_RETURN:
5067 switch (frame->Xwhere)
5068   {
5069   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5070   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5071   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5072   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5073   LBL(53) LBL(54)
5074 #ifdef SUPPORT_UTF8
5075   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5076   LBL(32) LBL(34) LBL(42) LBL(46)
5077 #ifdef SUPPORT_UCP
5078   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5079 #endif  /* SUPPORT_UCP */
5080 #endif  /* SUPPORT_UTF8 */
5081   default:
5082   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5083   return PCRE_ERROR_INTERNAL;
5084   }
5085 #undef LBL
5086 #endif  /* NO_RECURSE */
5087 }
5088
5089
5090 /***************************************************************************
5091 ****************************************************************************
5092                    RECURSION IN THE match() FUNCTION
5093
5094 Undefine all the macros that were defined above to handle this. */
5095
5096 #ifdef NO_RECURSE
5097 #undef eptr
5098 #undef ecode
5099 #undef mstart
5100 #undef offset_top
5101 #undef ims
5102 #undef eptrb
5103 #undef flags
5104
5105 #undef callpat
5106 #undef charptr
5107 #undef data
5108 #undef next
5109 #undef pp
5110 #undef prev
5111 #undef saved_eptr
5112
5113 #undef new_recursive
5114
5115 #undef cur_is_word
5116 #undef condition
5117 #undef prev_is_word
5118
5119 #undef original_ims
5120
5121 #undef ctype
5122 #undef length
5123 #undef max
5124 #undef min
5125 #undef number
5126 #undef offset
5127 #undef op
5128 #undef save_capture_last
5129 #undef save_offset1
5130 #undef save_offset2
5131 #undef save_offset3
5132 #undef stacksave
5133
5134 #undef newptrb
5135
5136 #endif
5137
5138 /* These two are defined as macros in both cases */
5139
5140 #undef fc
5141 #undef fi
5142
5143 /***************************************************************************
5144 ***************************************************************************/
5145
5146
5147
5148 /*************************************************
5149 *         Execute a Regular Expression           *
5150 *************************************************/
5151
5152 /* This function applies a compiled re to a subject string and picks out
5153 portions of the string if it matches. Two elements in the vector are set for
5154 each substring: the offsets to the start and end of the substring.
5155
5156 Arguments:
5157   argument_re     points to the compiled expression
5158   extra_data      points to extra data or is NULL
5159   subject         points to the subject string
5160   length          length of subject string (may contain binary zeros)
5161   start_offset    where to start in the subject string
5162   options         option bits
5163   offsets         points to a vector of ints to be filled in with offsets
5164   offsetcount     the number of elements in the vector
5165
5166 Returns:          > 0 => success; value is the number of elements filled in
5167                   = 0 => success, but offsets is not big enough
5168                    -1 => failed to match
5169                  < -1 => some kind of unexpected problem
5170 */
5171
5172 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5173 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5174   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5175   int offsetcount)
5176 {
5177 int rc, resetcount, ocount;
5178 int first_byte = -1;
5179 int req_byte = -1;
5180 int req_byte2 = -1;
5181 int newline;
5182 unsigned long int ims;
5183 BOOL using_temporary_offsets = FALSE;
5184 BOOL anchored;
5185 BOOL startline;
5186 BOOL firstline;
5187 BOOL first_byte_caseless = FALSE;
5188 BOOL req_byte_caseless = FALSE;
5189 BOOL utf8;
5190 match_data match_block;
5191 match_data *md = &match_block;
5192 const uschar *tables;
5193 const uschar *start_bits = NULL;
5194 USPTR start_match = (USPTR)subject + start_offset;
5195 USPTR end_subject;
5196 USPTR start_partial = NULL;
5197 USPTR req_byte_ptr = start_match - 1;
5198
5199 pcre_study_data internal_study;
5200 const pcre_study_data *study;
5201
5202 real_pcre internal_re;
5203 const real_pcre *external_re = (const real_pcre *)argument_re;
5204 const real_pcre *re = external_re;
5205
5206 /* Plausibility checks */
5207
5208 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5209 if (re == NULL || subject == NULL ||
5210    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5211 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5212
5213 /* This information is for finding all the numbers associated with a given
5214 name, for condition testing. */
5215
5216 md->name_table = (uschar *)re + re->name_table_offset;
5217 md->name_count = re->name_count;
5218 md->name_entry_size = re->name_entry_size;
5219
5220 /* Fish out the optional data from the extra_data structure, first setting
5221 the default values. */
5222
5223 study = NULL;
5224 md->match_limit = MATCH_LIMIT;
5225 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5226 md->callout_data = NULL;
5227
5228 /* The table pointer is always in native byte order. */
5229
5230 tables = external_re->tables;
5231
5232 if (extra_data != NULL)
5233   {
5234   register unsigned int flags = extra_data->flags;
5235   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5236     study = (const pcre_study_data *)extra_data->study_data;
5237   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5238     md->match_limit = extra_data->match_limit;
5239   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5240     md->match_limit_recursion = extra_data->match_limit_recursion;
5241   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5242     md->callout_data = extra_data->callout_data;
5243   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5244   }
5245
5246 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5247 is a feature that makes it possible to save compiled regex and re-use them
5248 in other programs later. */
5249
5250 if (tables == NULL) tables = _pcre_default_tables;
5251
5252 /* Check that the first field in the block is the magic number. If it is not,
5253 test for a regex that was compiled on a host of opposite endianness. If this is
5254 the case, flipped values are put in internal_re and internal_study if there was
5255 study data too. */
5256
5257 if (re->magic_number != MAGIC_NUMBER)
5258   {
5259   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5260   if (re == NULL) return PCRE_ERROR_BADMAGIC;
5261   if (study != NULL) study = &internal_study;
5262   }
5263
5264 /* Set up other data */
5265
5266 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5267 startline = (re->flags & PCRE_STARTLINE) != 0;
5268 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5269
5270 /* The code starts after the real_pcre block and the capture name table. */
5271
5272 md->start_code = (const uschar *)external_re + re->name_table_offset +
5273   re->name_count * re->name_entry_size;
5274
5275 md->start_subject = (USPTR)subject;
5276 md->start_offset = start_offset;
5277 md->end_subject = md->start_subject + length;
5278 end_subject = md->end_subject;
5279
5280 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5281 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5282 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5283
5284 md->notbol = (options & PCRE_NOTBOL) != 0;
5285 md->noteol = (options & PCRE_NOTEOL) != 0;
5286 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5287 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5288 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5289               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5290 md->hitend = FALSE;
5291
5292 md->recursive = NULL;                   /* No recursion at top level */
5293
5294 md->lcc = tables + lcc_offset;
5295 md->ctypes = tables + ctypes_offset;
5296
5297 /* Handle different \R options. */
5298
5299 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5300   {
5301   case 0:
5302   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5303     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5304   else
5305 #ifdef BSR_ANYCRLF
5306   md->bsr_anycrlf = TRUE;
5307 #else
5308   md->bsr_anycrlf = FALSE;
5309 #endif
5310   break;
5311
5312   case PCRE_BSR_ANYCRLF:
5313   md->bsr_anycrlf = TRUE;
5314   break;
5315
5316   case PCRE_BSR_UNICODE:
5317   md->bsr_anycrlf = FALSE;
5318   break;
5319
5320   default: return PCRE_ERROR_BADNEWLINE;
5321   }
5322
5323 /* Handle different types of newline. The three bits give eight cases. If
5324 nothing is set at run time, whatever was used at compile time applies. */
5325
5326 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5327         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5328   {
5329   case 0: newline = NEWLINE; break;   /* Compile-time default */
5330   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5331   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5332   case PCRE_NEWLINE_CR+
5333        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5334   case PCRE_NEWLINE_ANY: newline = -1; break;
5335   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5336   default: return PCRE_ERROR_BADNEWLINE;
5337   }
5338
5339 if (newline == -2)
5340   {
5341   md->nltype = NLTYPE_ANYCRLF;
5342   }
5343 else if (newline < 0)
5344   {
5345   md->nltype = NLTYPE_ANY;
5346   }
5347 else
5348   {
5349   md->nltype = NLTYPE_FIXED;
5350   if (newline > 255)
5351     {
5352     md->nllen = 2;
5353     md->nl[0] = (newline >> 8) & 255;
5354     md->nl[1] = newline & 255;
5355     }
5356   else
5357     {
5358     md->nllen = 1;
5359     md->nl[0] = newline;
5360     }
5361   }
5362
5363 /* Partial matching was originally supported only for a restricted set of
5364 regexes; from release 8.00 there are no restrictions, but the bits are still
5365 defined (though never set). So there's no harm in leaving this code. */
5366
5367 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5368   return PCRE_ERROR_BADPARTIAL;
5369
5370 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5371 back the character offset. */
5372
5373 #ifdef SUPPORT_UTF8
5374 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5375   {
5376   if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5377     return PCRE_ERROR_BADUTF8;
5378   if (start_offset > 0 && start_offset < length)
5379     {
5380     int tb = ((USPTR)subject)[start_offset];
5381     if (tb > 127)
5382       {
5383       tb &= 0xc0;
5384       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
5385       }
5386     }
5387   }
5388 #endif
5389
5390 /* The ims options can vary during the matching as a result of the presence
5391 of (?ims) items in the pattern. They are kept in a local variable so that
5392 restoring at the exit of a group is easy. */
5393
5394 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5395
5396 /* If the expression has got more back references than the offsets supplied can
5397 hold, we get a temporary chunk of working store to use during the matching.
5398 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5399 of 3. */
5400
5401 ocount = offsetcount - (offsetcount % 3);
5402
5403 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5404   {
5405   ocount = re->top_backref * 3 + 3;
5406   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5407   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5408   using_temporary_offsets = TRUE;
5409   DPRINTF(("Got memory to hold back references\n"));
5410   }
5411 else md->offset_vector = offsets;
5412
5413 md->offset_end = ocount;
5414 md->offset_max = (2*ocount)/3;
5415 md->offset_overflow = FALSE;
5416 md->capture_last = -1;
5417
5418 /* Compute the minimum number of offsets that we need to reset each time. Doing
5419 this makes a huge difference to execution time when there aren't many brackets
5420 in the pattern. */
5421
5422 resetcount = 2 + re->top_bracket * 2;
5423 if (resetcount > offsetcount) resetcount = ocount;
5424
5425 /* Reset the working variable associated with each extraction. These should
5426 never be used unless previously set, but they get saved and restored, and so we
5427 initialize them to avoid reading uninitialized locations. */
5428
5429 if (md->offset_vector != NULL)
5430   {
5431   register int *iptr = md->offset_vector + ocount;
5432   register int *iend = iptr - resetcount/2 + 1;
5433   while (--iptr >= iend) *iptr = -1;
5434   }
5435
5436 /* Set up the first character to match, if available. The first_byte value is
5437 never set for an anchored regular expression, but the anchoring may be forced
5438 at run time, so we have to test for anchoring. The first char may be unset for
5439 an unanchored pattern, of course. If there's no first char and the pattern was
5440 studied, there may be a bitmap of possible first characters. */
5441
5442 if (!anchored)
5443   {
5444   if ((re->flags & PCRE_FIRSTSET) != 0)
5445     {
5446     first_byte = re->first_byte & 255;
5447     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5448       first_byte = md->lcc[first_byte];
5449     }
5450   else
5451     if (!startline && study != NULL &&
5452       (study->flags & PCRE_STUDY_MAPPED) != 0)
5453         start_bits = study->start_bits;
5454   }
5455
5456 /* For anchored or unanchored matches, there may be a "last known required
5457 character" set. */
5458
5459 if ((re->flags & PCRE_REQCHSET) != 0)
5460   {
5461   req_byte = re->req_byte & 255;
5462   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5463   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
5464   }
5465
5466
5467 /* ==========================================================================*/
5468
5469 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5470 the loop runs just once. */
5471
5472 for(;;)
5473   {
5474   USPTR save_end_subject = end_subject;
5475   USPTR new_start_match;
5476
5477   /* Reset the maximum number of extractions we might see. */
5478
5479   if (md->offset_vector != NULL)
5480     {
5481     register int *iptr = md->offset_vector;
5482     register int *iend = iptr + resetcount;
5483     while (iptr < iend) *iptr++ = -1;
5484     }
5485
5486   /* If firstline is TRUE, the start of the match is constrained to the first
5487   line of a multiline string. That is, the match must be before or at the first
5488   newline. Implement this by temporarily adjusting end_subject so that we stop
5489   scanning at a newline. If the match fails at the newline, later code breaks
5490   this loop. */
5491
5492   if (firstline)
5493     {
5494     USPTR t = start_match;
5495 #ifdef SUPPORT_UTF8
5496     if (utf8)
5497       {
5498       while (t < md->end_subject && !IS_NEWLINE(t))
5499         {
5500         t++;
5501         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5502         }
5503       }
5504     else
5505 #endif
5506     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5507     end_subject = t;
5508     }
5509
5510   /* There are some optimizations that avoid running the match if a known
5511   starting point is not found, or if a known later character is not present.
5512   However, there is an option that disables these, for testing and for ensuring
5513   that all callouts do actually occur. */
5514
5515   if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5516     {
5517     /* Advance to a unique first byte if there is one. */
5518
5519     if (first_byte >= 0)
5520       {
5521       if (first_byte_caseless)
5522         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5523           start_match++;
5524       else
5525         while (start_match < end_subject && *start_match != first_byte)
5526           start_match++;
5527       }
5528
5529     /* Or to just after a linebreak for a multiline match */
5530
5531     else if (startline)
5532       {
5533       if (start_match > md->start_subject + start_offset)
5534         {
5535 #ifdef SUPPORT_UTF8
5536         if (utf8)
5537           {
5538           while (start_match < end_subject && !WAS_NEWLINE(start_match))
5539             {
5540             start_match++;
5541             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5542               start_match++;
5543             }
5544           }
5545         else
5546 #endif
5547         while (start_match < end_subject && !WAS_NEWLINE(start_match))
5548           start_match++;
5549
5550         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5551         and we are now at a LF, advance the match position by one more character.
5552         */
5553
5554         if (start_match[-1] == CHAR_CR &&
5555              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5556              start_match < end_subject &&
5557              *start_match == CHAR_NL)
5558           start_match++;
5559         }
5560       }
5561
5562     /* Or to a non-unique first byte after study */
5563
5564     else if (start_bits != NULL)
5565       {
5566       while (start_match < end_subject)
5567         {
5568         register unsigned int c = *start_match;
5569         if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5570           else break;
5571         }
5572       }
5573     }   /* Starting optimizations */
5574
5575   /* Restore fudged end_subject */
5576
5577   end_subject = save_end_subject;
5578
5579   /* The following two optimizations are disabled for partial matching or if
5580   disabling is explicitly requested. */
5581
5582   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5583     {
5584     /* If the pattern was studied, a minimum subject length may be set. This is
5585     a lower bound; no actual string of that length may actually match the
5586     pattern. Although the value is, strictly, in characters, we treat it as
5587     bytes to avoid spending too much time in this optimization. */
5588
5589     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5590         (pcre_uint32)(end_subject - start_match) < study->minlength)
5591       {
5592       rc = MATCH_NOMATCH;
5593       break;
5594       }
5595
5596     /* If req_byte is set, we know that that character must appear in the
5597     subject for the match to succeed. If the first character is set, req_byte
5598     must be later in the subject; otherwise the test starts at the match point.
5599     This optimization can save a huge amount of backtracking in patterns with
5600     nested unlimited repeats that aren't going to match. Writing separate code
5601     for cased/caseless versions makes it go faster, as does using an
5602     autoincrement and backing off on a match.
5603
5604     HOWEVER: when the subject string is very, very long, searching to its end
5605     can take a long time, and give bad performance on quite ordinary patterns.
5606     This showed up when somebody was matching something like /^\d+C/ on a
5607     32-megabyte string... so we don't do this when the string is sufficiently
5608     long. */
5609
5610     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5611       {
5612       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5613
5614       /* We don't need to repeat the search if we haven't yet reached the
5615       place we found it at last time. */
5616
5617       if (p > req_byte_ptr)
5618         {
5619         if (req_byte_caseless)
5620           {
5621           while (p < end_subject)
5622             {
5623             register int pp = *p++;
5624             if (pp == req_byte || pp == req_byte2) { p--; break; }
5625             }
5626           }
5627         else
5628           {
5629           while (p < end_subject)
5630             {
5631             if (*p++ == req_byte) { p--; break; }
5632             }
5633           }
5634
5635         /* If we can't find the required character, break the matching loop,
5636         forcing a match failure. */
5637
5638         if (p >= end_subject)
5639           {
5640           rc = MATCH_NOMATCH;
5641           break;
5642           }
5643
5644         /* If we have found the required character, save the point where we
5645         found it, so that we don't search again next time round the loop if
5646         the start hasn't passed this character yet. */
5647
5648         req_byte_ptr = p;
5649         }
5650       }
5651     }
5652
5653 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
5654   printf(">>>> Match against: ");
5655   pchars(start_match, end_subject - start_match, TRUE, md);
5656   printf("\n");
5657 #endif
5658
5659   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5660   first starting point for which a partial match was found. */
5661
5662   md->start_match_ptr = start_match;
5663   md->start_used_ptr = start_match;
5664   md->match_call_count = 0;
5665   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
5666     0, 0);
5667   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5668
5669   switch(rc)
5670     {
5671     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
5672     exactly like PRUNE. */
5673
5674     case MATCH_NOMATCH:
5675     case MATCH_PRUNE:
5676     case MATCH_THEN:
5677     new_start_match = start_match + 1;
5678 #ifdef SUPPORT_UTF8
5679     if (utf8)
5680       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
5681         new_start_match++;
5682 #endif
5683     break;
5684
5685     /* SKIP passes back the next starting point explicitly. */
5686
5687     case MATCH_SKIP:
5688     new_start_match = md->start_match_ptr;
5689     break;
5690
5691     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
5692
5693     case MATCH_COMMIT:
5694     rc = MATCH_NOMATCH;
5695     goto ENDLOOP;
5696
5697     /* Any other return is either a match, or some kind of error. */
5698
5699     default:
5700     goto ENDLOOP;
5701     }
5702
5703   /* Control reaches here for the various types of "no match at this point"
5704   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
5705
5706   rc = MATCH_NOMATCH;
5707
5708   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
5709   newline in the subject (though it may continue over the newline). Therefore,
5710   if we have just failed to match, starting at a newline, do not continue. */
5711
5712   if (firstline && IS_NEWLINE(start_match)) break;
5713
5714   /* Advance to new matching position */
5715
5716   start_match = new_start_match;
5717
5718   /* Break the loop if the pattern is anchored or if we have passed the end of
5719   the subject. */
5720
5721   if (anchored || start_match > end_subject) break;
5722
5723   /* If we have just passed a CR and we are now at a LF, and the pattern does
5724   not contain any explicit matches for \r or \n, and the newline option is CRLF
5725   or ANY or ANYCRLF, advance the match position by one more character. */
5726
5727   if (start_match[-1] == CHAR_CR &&
5728       start_match < end_subject &&
5729       *start_match == CHAR_NL &&
5730       (re->flags & PCRE_HASCRORLF) == 0 &&
5731         (md->nltype == NLTYPE_ANY ||
5732          md->nltype == NLTYPE_ANYCRLF ||
5733          md->nllen == 2))
5734     start_match++;
5735
5736   }   /* End of for(;;) "bumpalong" loop */
5737
5738 /* ==========================================================================*/
5739
5740 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
5741 conditions is true:
5742
5743 (1) The pattern is anchored or the match was failed by (*COMMIT);
5744
5745 (2) We are past the end of the subject;
5746
5747 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
5748     this option requests that a match occur at or before the first newline in
5749     the subject.
5750
5751 When we have a match and the offset vector is big enough to deal with any
5752 backreferences, captured substring offsets will already be set up. In the case
5753 where we had to get some local store to hold offsets for backreference
5754 processing, copy those that we can. In this case there need not be overflow if
5755 certain parts of the pattern were not used, even though there are more
5756 capturing parentheses than vector slots. */
5757
5758 ENDLOOP:
5759
5760 if (rc == MATCH_MATCH)
5761   {
5762   if (using_temporary_offsets)
5763     {
5764     if (offsetcount >= 4)
5765       {
5766       memcpy(offsets + 2, md->offset_vector + 2,
5767         (offsetcount - 2) * sizeof(int));
5768       DPRINTF(("Copied offsets from temporary memory\n"));
5769       }
5770     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
5771     DPRINTF(("Freeing temporary memory\n"));
5772     (pcre_free)(md->offset_vector);
5773     }
5774
5775   /* Set the return code to the number of captured strings, or 0 if there are
5776   too many to fit into the vector. */
5777
5778   rc = md->offset_overflow? 0 : md->end_offset_top/2;
5779
5780   /* If there is space, set up the whole thing as substring 0. The value of
5781   md->start_match_ptr might be modified if \K was encountered on the success
5782   matching path. */
5783
5784   if (offsetcount < 2) rc = 0; else
5785     {
5786     offsets[0] = md->start_match_ptr - md->start_subject;
5787     offsets[1] = md->end_match_ptr - md->start_subject;
5788     }
5789
5790   DPRINTF((">>>> returning %d\n", rc));
5791   return rc;
5792   }
5793
5794 /* Control gets here if there has been an error, or if the overall match
5795 attempt has failed at all permitted starting positions. */
5796
5797 if (using_temporary_offsets)
5798   {
5799   DPRINTF(("Freeing temporary memory\n"));
5800   (pcre_free)(md->offset_vector);
5801   }
5802
5803 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5804   {
5805   DPRINTF((">>>> error: returning %d\n", rc));
5806   return rc;
5807   }
5808 else if (start_partial != NULL)
5809   {
5810   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5811   if (offsetcount > 1)
5812     {
5813     offsets[0] = start_partial - (USPTR)subject;
5814     offsets[1] = end_subject - (USPTR)subject;
5815     }
5816   return PCRE_ERROR_PARTIAL;
5817   }
5818 else
5819   {
5820   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
5821   return PCRE_ERROR_NOMATCH;
5822   }
5823 }
5824
5825 /* End of pcre_exec.c */