GLib: implement GMutex natively on Linux

[platform/upstream/glib.git] / glib / pcre / pcre_dfa_exec.c
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c

index c241f5b..f8c887f 100644 (file)
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see
  below for why this module is different).
  
                         Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
  
  -----------------------------------------------------------------------------
  Redistribution and use in source and binary forms, with or without
@@ -38,10 +38,9 @@ POSSIBILITY OF SUCH DAMAGE.
  -----------------------------------------------------------------------------
  */
  
-
  /* This module contains the external function pcre_dfa_exec(), which is an
  alternative matching function that uses a sort of DFA algorithm (not a true
-FSM). This is NOT Perl- compatible, but it has advantages in certain
+FSM). This is NOT Perl-compatible, but it has advantages in certain
  applications. */
  
  
@@ -106,14 +105,14 @@ never stored, so we push them well clear of the normal opcodes. */
  
  
  /* This table identifies those opcodes that are followed immediately by a
-character that is to be tested in some way. This makes is possible to
+character that is to be tested in some way. This makes it possible to
  centralize the loading of these characters. In the case of Type * etc, the
  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
  small value. Non-zero values in the table are the offsets from the opcode where
  the character is to be found. ***NOTE*** If the start of this table is
  modified, the three tables that follow must also be modified. */
  
-static const uschar coptable[] = {
+static const pcre_uint8 coptable[] = {
    0,                             /* End                                    */
    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
@@ -121,22 +120,34 @@ static const uschar coptable[] = {
    0, 0,                          /* \P, \p                                 */
    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
    0,                             /* \X                                     */
-  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
+  0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
    1,                             /* Char                                   */
-  1,                             /* Charnc                                 */
+  1,                             /* Chari                                  */
    1,                             /* not                                    */
+  1,                             /* noti                                   */
    /* Positive single-char repeats                                          */
    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
-  3, 3, 3,                       /* upto, minupto, exact                   */
-  1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
+  1+IMM2_SIZE,                   /* exact                                  */
+  1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
+  1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
+  1+IMM2_SIZE,                   /* exact I                                */
+  1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
    /* Negative single-char repeats - only for chars < 256                   */
    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
-  3, 3, 3,                       /* NOT upto, minupto, exact               */
-  1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
+  1+IMM2_SIZE,                   /* NOT exact                              */
+  1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
+  1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
+  1+IMM2_SIZE,                   /* NOT exact I                            */
+  1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
    /* Positive type repeats                                                 */
    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
-  3, 3, 3,                       /* Type upto, minupto, exact              */
-  1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
+  1+IMM2_SIZE,                   /* Type exact                             */
+  1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
    /* Character class & ref repeats                                         */
    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
    0, 0,                          /* CRRANGE, CRMINRANGE                    */
@@ -144,25 +155,30 @@ static const uschar coptable[] = {
    0,                             /* NCLASS                                 */
    0,                             /* XCLASS - variable length               */
    0,                             /* REF                                    */
+  0,                             /* REFI                                   */
    0,                             /* RECURSE                                */
    0,                             /* CALLOUT                                */
    0,                             /* Alt                                    */
    0,                             /* Ket                                    */
    0,                             /* KetRmax                                */
    0,                             /* KetRmin                                */
+  0,                             /* KetRpos                                */
+  0,                             /* Reverse                                */
    0,                             /* Assert                                 */
    0,                             /* Assert not                             */
    0,                             /* Assert behind                          */
    0,                             /* Assert behind not                      */
-  0,                             /* Reverse                                */
-  0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
-  0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
+  0, 0,                          /* ONCE, ONCE_NC                          */
+  0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
+  0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
    0, 0,                          /* CREF, NCREF                            */
    0, 0,                          /* RREF, NRREF                            */
    0,                             /* DEF                                    */
-  0, 0,                          /* BRAZERO, BRAMINZERO                    */
-  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
-  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+  0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
+  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
+  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
+  0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
+  0, 0                           /* CLOSE, SKIPZERO  */
  };
  
  /* This table identifies those opcodes that inspect a character. It is used to
@@ -170,7 +186,7 @@ remember the fact that a character could have been inspected when the end of
  the subject is reached. ***NOTE*** If the start of this table is modified, the
  two tables that follow must also be modified. */
  
-static const uschar poptable[] = {
+static const pcre_uint8 poptable[] = {
    0,                             /* End                                    */
    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
@@ -178,18 +194,25 @@ static const uschar poptable[] = {
    1, 1,                          /* \P, \p                                 */
    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
    1,                             /* \X                                     */
-  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
+  0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
    1,                             /* Char                                   */
-  1,                             /* Charnc                                 */
+  1,                             /* Chari                                  */
    1,                             /* not                                    */
+  1,                             /* noti                                   */
    /* Positive single-char repeats                                          */
    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
    1, 1, 1,                       /* upto, minupto, exact                   */
    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
+  1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
+  1, 1, 1,                       /* upto I, minupto I, exact I             */
+  1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
    /* Negative single-char repeats - only for chars < 256                   */
    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
    1, 1, 1,                       /* NOT upto, minupto, exact               */
    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
+  1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
+  1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
+  1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
    /* Positive type repeats                                                 */
    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
    1, 1, 1,                       /* Type upto, minupto, exact              */
@@ -201,31 +224,36 @@ static const uschar poptable[] = {
    1,                             /* NCLASS                                 */
    1,                             /* XCLASS - variable length               */
    0,                             /* REF                                    */
+  0,                             /* REFI                                   */
    0,                             /* RECURSE                                */
    0,                             /* CALLOUT                                */
    0,                             /* Alt                                    */
    0,                             /* Ket                                    */
    0,                             /* KetRmax                                */
    0,                             /* KetRmin                                */
+  0,                             /* KetRpos                                */
+  0,                             /* Reverse                                */
    0,                             /* Assert                                 */
    0,                             /* Assert not                             */
    0,                             /* Assert behind                          */
    0,                             /* Assert behind not                      */
-  0,                             /* Reverse                                */
-  0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
-  0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
+  0, 0,                          /* ONCE, ONCE_NC                          */
+  0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
+  0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
    0, 0,                          /* CREF, NCREF                            */
    0, 0,                          /* RREF, NRREF                            */
    0,                             /* DEF                                    */
-  0, 0,                          /* BRAZERO, BRAMINZERO                    */
-  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
-  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+  0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
+  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
+  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
+  0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
+  0, 0                           /* CLOSE, SKIPZERO                        */
  };
  
  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
  and \w */
  
-static const uschar toptable1[] = {
+static const pcre_uint8 toptable1[] = {
    0, 0, 0, 0, 0, 0,
    ctype_digit, ctype_digit,
    ctype_space, ctype_space,
@@ -233,7 +261,7 @@ static const uschar toptable1[] = {
    0, 0                            /* OP_ANY, OP_ALLANY */
  };
  
-static const uschar toptable2[] = {
+static const pcre_uint8 toptable2[] = {
    0, 0, 0, 0, 0, 0,
    ctype_digit, 0,
    ctype_space, 0,
@@ -250,11 +278,10 @@ these structures in, is a vector of ints. */
  typedef struct stateblock {
    int offset;                     /* Offset to opcode */
    int count;                      /* Count for repeats */
-  int ims;                        /* ims flag bits */
    int data;                       /* Some use extra data */
  } stateblock;
  
-#define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
  
  
  #ifdef PCRE_DEBUG
@@ -273,7 +300,7 @@ Returns:       nothing
  */
  
  static void
-pchars(unsigned char *p, int length, FILE *f)
+pchars(const pcre_uchar *p, int length, FILE *f)
  {
  int c;
  while (length-- > 0)
@@ -306,9 +333,7 @@ Arguments:
    offsetcount       size of same
    workspace         vector of workspace
    wscount           size of same
-  ims               the current ims flags
    rlevel            function call recursion level
-  recursing         regex recursive call level
  
  Returns:            > 0 => number of match offset pairs placed in offsets
                      = 0 => offsets overflowed; longest matches are present
@@ -323,7 +348,6 @@ for the current character, one for the following character). */
      { \
      next_active_state->offset = (x); \
      next_active_state->count  = (y); \
-    next_active_state->ims    = ims; \
      next_active_state++; \
      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
      } \
@@ -334,7 +358,6 @@ for the current character, one for the following character). */
      { \
      next_active_state->offset = (x); \
      next_active_state->count  = (y); \
-    next_active_state->ims    = ims; \
      next_active_state->data   = (z); \
      next_active_state++; \
      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
@@ -346,7 +369,6 @@ for the current character, one for the following character). */
      { \
      next_new_state->offset = (x); \
      next_new_state->count  = (y); \
-    next_new_state->ims    = ims; \
      next_new_state++; \
      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
      } \
@@ -357,10 +379,10 @@ for the current character, one for the following character). */
      { \
      next_new_state->offset = (x); \
      next_new_state->count  = (y); \
-    next_new_state->ims    = ims; \
      next_new_state->data   = (z); \
      next_new_state++; \
-    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
+    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
+      (x), (y), (z), __LINE__)); \
      } \
    else return PCRE_ERROR_DFA_WSSIZE
  
@@ -369,39 +391,41 @@ for the current character, one for the following character). */
  static int
  internal_dfa_exec(
    dfa_match_data *md,
-  const uschar *this_start_code,
-  const uschar *current_subject,
+  const pcre_uchar *this_start_code,
+  const pcre_uchar *current_subject,
    int start_offset,
    int *offsets,
    int offsetcount,
    int *workspace,
    int wscount,
-  int ims,
-  int  rlevel,
-  int  recursing)
+  int  rlevel)
  {
  stateblock *active_states, *new_states, *temp_states;
  stateblock *next_active_state, *next_new_state;
  
-const uschar *ctypes, *lcc, *fcc;
-const uschar *ptr;
-const uschar *end_code, *first_op;
+const pcre_uint8 *ctypes, *lcc, *fcc;
+const pcre_uchar *ptr;
+const pcre_uchar *end_code, *first_op;
+
+dfa_recursion_info new_recursive;
  
  int active_count, new_count, match_count;
  
  /* Some fields in the md block are frequently referenced, so we load them into
  independent variables in the hope that this will perform better. */
  
-const uschar *start_subject = md->start_subject;
-const uschar *end_subject = md->end_subject;
-const uschar *start_code = md->start_code;
+const pcre_uchar *start_subject = md->start_subject;
+const pcre_uchar *end_subject = md->end_subject;
+const pcre_uchar *start_code = md->start_code;
  
-#ifdef SUPPORT_UTF8
-BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
+#ifdef SUPPORT_UTF
+BOOL utf = (md->poptions & PCRE_UTF8) != 0;
  #else
-BOOL utf8 = FALSE;
+BOOL utf = FALSE;
  #endif
  
+BOOL reset_could_continue = FALSE;
+
  rlevel++;
  offsetcount &= (-2);
  
@@ -410,8 +434,8 @@ wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
            (2 * INTS_PER_STATEBLOCK);
  
  DPRINTF(("\n%.*s---------------------\n"
-  "%.*sCall to internal_dfa_exec f=%d r=%d\n",
-  rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
+  "%.*sCall to internal_dfa_exec f=%d\n",
+  rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
  
  ctypes = md->tables + ctypes_offset;
  lcc = md->tables + lcc_offset;
@@ -424,7 +448,9 @@ next_new_state = new_states = active_states + wscount;
  new_count = 0;
  
  first_op = this_start_code + 1 + LINK_SIZE +
-  ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
+  ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
+    *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
+    ? IMM2_SIZE:0);
  
  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
  the alternative states onto the list, and find out where the end is. This
@@ -452,18 +478,16 @@ if (*first_op == OP_REVERSE)
    /* If we can't go back the amount required for the longest lookbehind
    pattern, go back as far as we can; some alternatives may still be viable. */
  
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
    /* In character mode we have to step back character by character */
  
-  if (utf8)
+  if (utf)
      {
      for (gone_back = 0; gone_back < max_back; gone_back++)
        {
        if (current_subject <= start_subject) break;
        current_subject--;
-      while (current_subject > start_subject &&
-             (*current_subject & 0xc0) == 0x80)
-        current_subject--;
+      ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
        }
      }
    else
@@ -473,7 +497,7 @@ if (*first_op == OP_REVERSE)
  
      {
      gone_back = (current_subject - max_back < start_subject)?
-      current_subject - start_subject : max_back;
+      (int)(current_subject - start_subject) : max_back;
      current_subject -= gone_back;
      }
  
@@ -490,7 +514,7 @@ if (*first_op == OP_REVERSE)
      int back = GET(end_code, 2+LINK_SIZE);
      if (back <= gone_back)
        {
-      int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
+      int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
        ADD_NEW_DATA(-bstate, 0, gone_back - back);
        }
      end_code += GET(end_code, 1);
@@ -523,10 +547,12 @@ else
    else
      {
      int length = 1 + LINK_SIZE +
-      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
+      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
+        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
+        ? IMM2_SIZE:0);
      do
        {
-      ADD_NEW(end_code - start_code + length, 0);
+      ADD_NEW((int)(end_code - start_code + length), 0);
        end_code += GET(end_code, 1);
        length = 1 + LINK_SIZE;
        }
@@ -536,7 +562,7 @@ else
  
  workspace[0] = 0;    /* Bit indicating which vector is current */
  
-DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
+DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
  
  /* Loop for scanning the subject */
  
@@ -547,7 +573,9 @@ for (;;)
    int clen, dlen;
    unsigned int c, d;
    int forced_fail = 0;
-  BOOL could_continue = FALSE;
+  BOOL partial_newline = FALSE;
+  BOOL could_continue = reset_could_continue;
+  reset_could_continue = FALSE;
  
    /* Make the new state list into the active state list and empty the
    new state list. */
@@ -563,7 +591,7 @@ for (;;)
  
  #ifdef PCRE_DEBUG
    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
-  pchars((uschar *)ptr, strlen((char *)ptr), stdout);
+  pchars(ptr, STRLEN_UC(ptr), stdout);
    printf("\"\n");
  
    printf("%.*sActive states: ", rlevel*2-2, SP);
@@ -583,10 +611,10 @@ for (;;)
  
    if (ptr < end_subject)
      {
-    clen = 1;        /* Number of bytes in the character */
-#ifdef SUPPORT_UTF8
-    if (utf8) { GETCHARLEN(c, ptr, clen); } else
-#endif  /* SUPPORT_UTF8 */
+    clen = 1;        /* Number of data items in the character */
+#ifdef SUPPORT_UTF
+    if (utf) { GETCHARLEN(c, ptr, clen); } else
+#endif  /* SUPPORT_UTF */
      c = *ptr;
      }
    else
@@ -603,7 +631,8 @@ for (;;)
    for (i = 0; i < active_count; i++)
      {
      stateblock *current_state = active_states + i;
-    const uschar *code;
+    BOOL caseless = FALSE;
+    const pcre_uchar *code;
      int state_offset = current_state->offset;
      int count, codevalue, rrc;
  
@@ -614,13 +643,10 @@ for (;;)
          else printf("0x%02x\n", c);
  #endif
  
-    /* This variable is referred to implicity in the ADD_xxx macros. */
-
-    ims = current_state->ims;
-
      /* A negative offset is a special case meaning "hold off going to this
      (negated) state until the number of characters in the data field have
-    been skipped". */
+    been skipped". If the could_continue flag was passed over from a previous
+    state, arrange for it to passed on. */
  
      if (state_offset < 0)
        {
@@ -629,6 +655,7 @@ for (;;)
          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
          ADD_NEW_DATA(state_offset, current_state->count,
            current_state->data - 1);
+        if (could_continue) reset_could_continue = TRUE;
          continue;
          }
        else
@@ -668,17 +695,17 @@ for (;;)
      permitted.
  
      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
-    argument that is not a data character - but is always one byte long. We
-    have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
-    this case. To keep the other cases fast, convert these ones to new opcodes.
-    */
+    argument that is not a data character - but is always one byte long because
+    the values are small. We have to take special action to deal with  \P, \p,
+    \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
+    these ones to new opcodes. */
  
      if (coptable[codevalue] > 0)
        {
        dlen = 1;
-#ifdef SUPPORT_UTF8
-      if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
-#endif  /* SUPPORT_UTF8 */
+#ifdef SUPPORT_UTF
+      if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
+#endif  /* SUPPORT_UTF */
        d = code[coptable[codevalue]];
        if (codevalue >= OP_TYPESTAR)
          {
@@ -723,7 +750,12 @@ for (;;)
  
  /* ========================================================================== */
        /* Reached a closing bracket. If not at the end of the pattern, carry
-      on with the next opcode. Otherwise, unless we have an empty string and
+      on with the next opcode. For repeating opcodes, also add the repeat
+      state. Note that KETRPOS will always be encountered at the end of the
+      subpattern, because the possessive subpattern repeats are always handled
+      using recursive calls. Thus, it never adds any new states.
+
+      At the end of the (sub)pattern, unless we have an empty string and
        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
        start of the subject, save the match data, shifting up all previous
        matches so we always have the longest first. */
@@ -731,6 +763,7 @@ for (;;)
        case OP_KET:
        case OP_KETRMIN:
        case OP_KETRMAX:
+      case OP_KETRPOS:
        if (code != end_code)
          {
          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
@@ -747,16 +780,16 @@ for (;;)
                  current_subject > start_subject + md->start_offset)))
            {
            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
-            else if (match_count > 0 && ++match_count * 2 >= offsetcount)
+            else if (match_count > 0 && ++match_count * 2 > offsetcount)
                match_count = 0;
            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
            if (offsetcount >= 2)
              {
-            offsets[0] = current_subject - start_subject;
-            offsets[1] = ptr - start_subject;
+            offsets[0] = (int)(current_subject - start_subject);
+            offsets[1] = (int)(ptr - start_subject);
              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
-              offsets[1] - offsets[0], current_subject));
+              offsets[1] - offsets[0], (char *)current_subject));
              }
            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
              {
@@ -776,7 +809,7 @@ for (;;)
        /*-----------------------------------------------------------------*/
        case OP_ALT:
        do { code += GET(code, 1); } while (*code == OP_ALT);
-      ADD_ACTIVE(code - start_code, 0);
+      ADD_ACTIVE((int)(code - start_code), 0);
        break;
  
        /*-----------------------------------------------------------------*/
@@ -784,7 +817,7 @@ for (;;)
        case OP_SBRA:
        do
          {
-        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
+        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
          code += GET(code, 1);
          }
        while (*code == OP_ALT);
@@ -793,11 +826,11 @@ for (;;)
        /*-----------------------------------------------------------------*/
        case OP_CBRA:
        case OP_SCBRA:
-      ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
+      ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
        code += GET(code, 1);
        while (*code == OP_ALT)
          {
-        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
+        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
          code += GET(code, 1);
          }
        break;
@@ -808,34 +841,37 @@ for (;;)
        ADD_ACTIVE(state_offset + 1, 0);
        code += 1 + GET(code, 2);
        while (*code == OP_ALT) code += GET(code, 1);
-      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
+      ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
        break;
  
        /*-----------------------------------------------------------------*/
        case OP_SKIPZERO:
        code += 1 + GET(code, 2);
        while (*code == OP_ALT) code += GET(code, 1);
-      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
+      ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
        break;
  
        /*-----------------------------------------------------------------*/
        case OP_CIRC:
-      if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
-          ((ims & PCRE_MULTILINE) != 0 &&
-            ptr != end_subject &&
-            WAS_NEWLINE(ptr)))
+      if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
          { ADD_ACTIVE(state_offset + 1, 0); }
        break;
  
        /*-----------------------------------------------------------------*/
-      case OP_EOD:
-      if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
+      case OP_CIRCM:
+      if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
+          (ptr != end_subject && WAS_NEWLINE(ptr)))
+        { ADD_ACTIVE(state_offset + 1, 0); }
        break;
  
        /*-----------------------------------------------------------------*/
-      case OP_OPT:
-      ims = code[1];
-      ADD_ACTIVE(state_offset + 2, 0);
+      case OP_EOD:
+      if (ptr >= end_subject)
+        {
+        if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+          could_continue = TRUE;
+        else { ADD_ACTIVE(state_offset + 1, 0); }
+        }
        break;
  
        /*-----------------------------------------------------------------*/
@@ -858,7 +894,20 @@ for (;;)
        /*-----------------------------------------------------------------*/
        case OP_ANY:
        if (clen > 0 && !IS_NEWLINE(ptr))
-        { ADD_NEW(state_offset + 1, 0); }
+        {
+        if (ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else
+          {
+          ADD_NEW(state_offset + 1, 0);
+          }
+        }
        break;
  
        /*-----------------------------------------------------------------*/
@@ -869,7 +918,9 @@ for (;;)
  
        /*-----------------------------------------------------------------*/
        case OP_EODN:
-      if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
+      if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+        could_continue = TRUE;
+      else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
          { ADD_ACTIVE(state_offset + 1, 0); }
        break;
  
@@ -877,13 +928,53 @@ for (;;)
        case OP_DOLL:
        if ((md->moptions & PCRE_NOTEOL) == 0)
          {
-        if (clen == 0 ||
+        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+          could_continue = TRUE;
+        else if (clen == 0 ||
              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
-               ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
+               (ptr == end_subject - md->nllen)
              ))
            { ADD_ACTIVE(state_offset + 1, 0); }
+        else if (ptr + 1 >= md->end_subject &&
+                 (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+                 NLBLOCK->nltype == NLTYPE_FIXED &&
+                 NLBLOCK->nllen == 2 &&
+                 c == NLBLOCK->nl[0])
+          {
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            {
+            reset_could_continue = TRUE;
+            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+            }
+          else could_continue = partial_newline = TRUE;
+          }
          }
-      else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_DOLLM:
+      if ((md->moptions & PCRE_NOTEOL) == 0)
+        {
+        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+          could_continue = TRUE;
+        else if (clen == 0 ||
+            ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
+          { ADD_ACTIVE(state_offset + 1, 0); }
+        else if (ptr + 1 >= md->end_subject &&
+                 (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+                 NLBLOCK->nltype == NLTYPE_FIXED &&
+                 NLBLOCK->nllen == 2 &&
+                 c == NLBLOCK->nl[0])
+          {
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            {
+            reset_could_continue = TRUE;
+            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+            }
+          else could_continue = partial_newline = TRUE;
+          }
+        }
+      else if (IS_NEWLINE(ptr))
          { ADD_ACTIVE(state_offset + 1, 0); }
        break;
  
@@ -914,19 +1005,43 @@ for (;;)
  
          if (ptr > start_subject)
            {
-          const uschar *temp = ptr - 1;
+          const pcre_uchar *temp = ptr - 1;
            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF8
-          if (utf8) BACKCHAR(temp);
+#ifdef SUPPORT_UTF
+          if (utf) { BACKCHAR(temp); }
  #endif
            GETCHARTEST(d, temp);
+#ifdef SUPPORT_UCP
+          if ((md->poptions & PCRE_UCP) != 0)
+            {
+            if (d == '_') left_word = TRUE; else
+              {
+              int cat = UCD_CATEGORY(d);
+              left_word = (cat == ucp_L || cat == ucp_N);
+              }
+            }
+          else
+#endif
            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
            }
-        else left_word = 0;
+        else left_word = FALSE;
  
          if (clen > 0)
+          {
+#ifdef SUPPORT_UCP
+          if ((md->poptions & PCRE_UCP) != 0)
+            {
+            if (c == '_') right_word = TRUE; else
+              {
+              int cat = UCD_CATEGORY(c);
+              right_word = (cat == ucp_L || cat == ucp_N);
+              }
+            }
+          else
+#endif
            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
-        else right_word = 0;
+          }
+        else right_word = FALSE;
  
          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
            { ADD_ACTIVE(state_offset + 1, 0); }
@@ -945,7 +1060,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int chartype = UCD_CHARTYPE(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[1])
            {
            case PT_ANY:
@@ -953,11 +1068,12 @@ for (;;)
            break;
  
            case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+               chartype == ucp_Lt;
            break;
  
            case PT_GC:
-          OK = _pcre_ucp_gentype[chartype] == code[2];
+          OK = PRIV(ucp_gentype)[chartype] == code[2];
            break;
  
            case PT_PC:
@@ -968,6 +1084,30 @@ for (;;)
            OK = UCD_SCRIPT(c) == code[2];
            break;
  
+          /* These are specials for combination cases. */
+
+          case PT_ALNUM:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
+          break;
+
+          case PT_SPACE:    /* Perl space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_PXSPACE:  /* POSIX space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+               c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_WORD:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               c == CHAR_UNDERSCORE;
+          break;
+
            /* Should never occur, but keep compilers from grumbling. */
  
            default:
@@ -995,7 +1135,15 @@ for (;;)
        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
        if (clen > 0)
          {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
              (c < 256 &&
                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1018,7 +1166,15 @@ for (;;)
        ADD_ACTIVE(state_offset + 2, 0);
        if (clen > 0)
          {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
              (c < 256 &&
                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1040,7 +1196,15 @@ for (;;)
        ADD_ACTIVE(state_offset + 2, 0);
        if (clen > 0)
          {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
              (c < 256 &&
                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1060,13 +1224,21 @@ for (;;)
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
              (c < 256 &&
                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
            {
            if (++count >= GET2(code, 1))
-            { ADD_NEW(state_offset + 4, 0); }
+            { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
            else
              { ADD_NEW(state_offset, count); }
            }
@@ -1077,11 +1249,19 @@ for (;;)
        case OP_TYPEUPTO:
        case OP_TYPEMINUPTO:
        case OP_TYPEPOSUPTO:
-      ADD_ACTIVE(state_offset + 4, 0);
+      ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
-        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+        if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+            (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+            NLBLOCK->nltype == NLTYPE_FIXED &&
+            NLBLOCK->nllen == 2 &&
+            c == NLBLOCK->nl[0])
+          {
+          could_continue = partial_newline = TRUE;
+          }
+        else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
              (c < 256 &&
                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
@@ -1092,7 +1272,7 @@ for (;;)
              next_active_state--;
              }
            if (++count >= GET2(code, 1))
-            { ADD_NEW(state_offset + 4, 0); }
+            { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
            else
              { ADD_NEW(state_offset, count); }
            }
@@ -1114,7 +1294,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int chartype = UCD_CHARTYPE(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[2])
            {
            case PT_ANY:
@@ -1122,11 +1302,12 @@ for (;;)
            break;
  
            case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+            chartype == ucp_Lt;
            break;
  
            case PT_GC:
-          OK = _pcre_ucp_gentype[chartype] == code[3];
+          OK = PRIV(ucp_gentype)[chartype] == code[3];
            break;
  
            case PT_PC:
@@ -1137,6 +1318,30 @@ for (;;)
            OK = UCD_SCRIPT(c) == code[3];
            break;
  
+          /* These are specials for combination cases. */
+
+          case PT_ALNUM:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
+          break;
+
+          case PT_SPACE:    /* Perl space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_PXSPACE:  /* POSIX space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+               c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_WORD:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               c == CHAR_UNDERSCORE;
+          break;
+
            /* Should never occur, but keep compilers from grumbling. */
  
            default:
@@ -1165,7 +1370,7 @@ for (;;)
        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
-        const uschar *nptr = ptr + clen;
+        const pcre_uchar *nptr = ptr + clen;
          int ncount = 0;
          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
            {
@@ -1336,7 +1541,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
-        int chartype = UCD_CHARTYPE(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[2])
            {
            case PT_ANY:
@@ -1344,11 +1549,12 @@ for (;;)
            break;
  
            case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+            chartype == ucp_Lt;
            break;
  
            case PT_GC:
-          OK = _pcre_ucp_gentype[chartype] == code[3];
+          OK = PRIV(ucp_gentype)[chartype] == code[3];
            break;
  
            case PT_PC:
@@ -1359,6 +1565,30 @@ for (;;)
            OK = UCD_SCRIPT(c) == code[3];
            break;
  
+          /* These are specials for combination cases. */
+
+          case PT_ALNUM:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
+          break;
+
+          case PT_SPACE:    /* Perl space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_PXSPACE:  /* POSIX space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+               c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_WORD:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               c == CHAR_UNDERSCORE;
+          break;
+
            /* Should never occur, but keep compilers from grumbling. */
  
            default:
@@ -1396,7 +1626,7 @@ for (;;)
        ADD_ACTIVE(state_offset + 2, 0);
        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
-        const uschar *nptr = ptr + clen;
+        const pcre_uchar *nptr = ptr + clen;
          int ncount = 0;
          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
@@ -1578,32 +1808,57 @@ for (;;)
        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
-        { ADD_ACTIVE(state_offset + 6, 0); }
+        { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
          BOOL OK;
-        int chartype = UCD_CHARTYPE(c);
-        switch(code[4])
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
+        switch(code[1 + IMM2_SIZE + 1])
            {
            case PT_ANY:
            OK = TRUE;
            break;
  
            case PT_LAMP:
-          OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+            chartype == ucp_Lt;
            break;
  
            case PT_GC:
-          OK = _pcre_ucp_gentype[chartype] == code[5];
+          OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
            break;
  
            case PT_PC:
-          OK = chartype == code[5];
+          OK = chartype == code[1 + IMM2_SIZE + 2];
            break;
  
            case PT_SC:
-          OK = UCD_SCRIPT(c) == code[5];
+          OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
+          break;
+
+          /* These are specials for combination cases. */
+
+          case PT_ALNUM:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
+          break;
+
+          case PT_SPACE:    /* Perl space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_PXSPACE:  /* POSIX space */
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+               c == CHAR_FF || c == CHAR_CR;
+          break;
+
+          case PT_WORD:
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
+               c == CHAR_UNDERSCORE;
            break;
  
            /* Should never occur, but keep compilers from grumbling. */
@@ -1621,7 +1876,7 @@ for (;;)
              next_active_state--;
              }
            if (++count >= GET2(code, 1))
-            { ADD_NEW(state_offset + 6, 0); }
+            { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
            else
              { ADD_NEW(state_offset, count); }
            }
@@ -1634,11 +1889,11 @@ for (;;)
        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
-        { ADD_ACTIVE(state_offset + 4, 0); }
+        { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
        count = current_state->count;  /* Number already matched */
        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
-        const uschar *nptr = ptr + clen;
+        const pcre_uchar *nptr = ptr + clen;
          int ncount = 0;
          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
            {
@@ -1654,8 +1909,10 @@ for (;;)
            ncount++;
            nptr += ndlen;
            }
+        if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
          if (++count >= GET2(code, 1))
-          { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
+          { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
          else
            { ADD_NEW_DATA(-state_offset, count, ncount); }
          }
@@ -1668,7 +1925,7 @@ for (;;)
        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
-        { ADD_ACTIVE(state_offset + 4, 0); }
+        { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
@@ -1695,7 +1952,7 @@ for (;;)
              next_active_state--;
              }
            if (++count >= GET2(code, 1))
-            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
+            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
            else
              { ADD_NEW_DATA(-state_offset, count, ncount); }
            break;
@@ -1712,7 +1969,7 @@ for (;;)
        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
-        { ADD_ACTIVE(state_offset + 4, 0); }
+        { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
@@ -1741,7 +1998,7 @@ for (;;)
              next_active_state--;
              }
            if (++count >= GET2(code, 1))
-            { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
+            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
            else
              { ADD_NEW_DATA(-state_offset, count, 0); }
            }
@@ -1754,7 +2011,7 @@ for (;;)
        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
-        { ADD_ACTIVE(state_offset + 4, 0); }
+        { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
@@ -1796,7 +2053,7 @@ for (;;)
              next_active_state--;
              }
            if (++count >= GET2(code, 1))
-            { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
+            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
            else
              { ADD_NEW_DATA(-state_offset, count, 0); }
            }
@@ -1815,35 +2072,35 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
-      case OP_CHARNC:
+      case OP_CHARI:
        if (clen == 0) break;
  
-#ifdef SUPPORT_UTF8
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
          {
          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
            {
            unsigned int othercase;
-          if (c < 128) othercase = fcc[c]; else
-
-          /* If we have Unicode property support, we can use it to test the
-          other case of the character. */
-
+          if (c < 128)
+            othercase = fcc[c];
+          else
+            /* If we have Unicode property support, we can use it to test the
+            other case of the character. */
  #ifdef SUPPORT_UCP
-          othercase = UCD_OTHERCASE(c);
+            othercase = UCD_OTHERCASE(c);
  #else
-          othercase = NOTACHAR;
+            othercase = NOTACHAR;
  #endif
  
            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
            }
          }
        else
-#endif  /* SUPPORT_UTF8 */
-
-      /* Non-UTF-8 mode */
+#endif  /* SUPPORT_UTF */
+      /* Not UTF mode */
          {
-        if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
+        if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
+          { ADD_NEW(state_offset + 2, 0); }
          }
        break;
  
@@ -1857,7 +2114,7 @@ for (;;)
        case OP_EXTUNI:
        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
          {
-        const uschar *nptr = ptr + clen;
+        const pcre_uchar *nptr = ptr + clen;
          int ncount = 0;
          while (nptr < end_subject)
            {
@@ -1867,6 +2124,8 @@ for (;;)
            ncount++;
            nptr += nclen;
            }
+        if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
          }
        break;
@@ -1892,7 +2151,13 @@ for (;;)
          break;
  
          case 0x000d:
-        if (ptr + 1 < end_subject && ptr[1] == 0x0a)
+        if (ptr + 1 >= end_subject)
+          {
+          ADD_NEW(state_offset + 1, 0);
+          if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+            reset_could_continue = TRUE;
+          }
+        else if (ptr[1] == 0x0a)
            {
            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
            }
@@ -2001,19 +2266,45 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
-      /* Match a negated single character. This is only used for one-byte
-      characters, that is, we know that d < 256. The character we are
-      checking (c) can be multibyte. */
+      /* Match a negated single character casefully. */
  
        case OP_NOT:
+      if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      /* Match a negated single character caselessly. */
+
+      case OP_NOTI:
        if (clen > 0)
          {
-        unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
-        if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
+        unsigned int otherd;
+#ifdef SUPPORT_UTF
+        if (utf && d >= 128)
+          {
+#ifdef SUPPORT_UCP
+          otherd = UCD_OTHERCASE(d);
+#endif  /* SUPPORT_UCP */
+          }
+        else
+#endif  /* SUPPORT_UTF */
+        otherd = TABLE_GET(d, fcc, d);
+        if (c != d && c != otherd)
+          { ADD_NEW(state_offset + dlen + 1, 0); }
          }
        break;
  
        /*-----------------------------------------------------------------*/
+      case OP_PLUSI:
+      case OP_MINPLUSI:
+      case OP_POSPLUSI:
+      case OP_NOTPLUSI:
+      case OP_NOTMINPLUSI:
+      case OP_NOTPOSPLUSI:
+      caseless = TRUE;
+      codevalue -= OP_STARI - OP_STAR;
+
+      /* Fall through */
        case OP_PLUS:
        case OP_MINPLUS:
        case OP_POSPLUS:
@@ -2025,18 +2316,18 @@ for (;;)
        if (clen > 0)
          {
          unsigned int otherd = NOTACHAR;
-        if ((ims & PCRE_CASELESS) != 0)
+        if (caseless)
            {
-#ifdef SUPPORT_UTF8
-          if (utf8 && d >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && d >= 128)
              {
  #ifdef SUPPORT_UCP
              otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
-#endif  /* SUPPORT_UTF8 */
-          otherd = fcc[d];
+#endif  /* SUPPORT_UTF */
+          otherd = TABLE_GET(d, fcc, d);
            }
          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
            {
@@ -2053,6 +2344,15 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
+      case OP_QUERYI:
+      case OP_MINQUERYI:
+      case OP_POSQUERYI:
+      case OP_NOTQUERYI:
+      case OP_NOTMINQUERYI:
+      case OP_NOTPOSQUERYI:
+      caseless = TRUE;
+      codevalue -= OP_STARI - OP_STAR;
+      /* Fall through */
        case OP_QUERY:
        case OP_MINQUERY:
        case OP_POSQUERY:
@@ -2063,18 +2363,18 @@ for (;;)
        if (clen > 0)
          {
          unsigned int otherd = NOTACHAR;
-        if ((ims & PCRE_CASELESS) != 0)
+        if (caseless)
            {
-#ifdef SUPPORT_UTF8
-          if (utf8 && d >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && d >= 128)
              {
  #ifdef SUPPORT_UCP
              otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
-#endif  /* SUPPORT_UTF8 */
-          otherd = fcc[d];
+#endif  /* SUPPORT_UTF */
+          otherd = TABLE_GET(d, fcc, d);
            }
          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
            {
@@ -2089,6 +2389,15 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
+      case OP_STARI:
+      case OP_MINSTARI:
+      case OP_POSSTARI:
+      case OP_NOTSTARI:
+      case OP_NOTMINSTARI:
+      case OP_NOTPOSSTARI:
+      caseless = TRUE;
+      codevalue -= OP_STARI - OP_STAR;
+      /* Fall through */
        case OP_STAR:
        case OP_MINSTAR:
        case OP_POSSTAR:
@@ -2099,18 +2408,18 @@ for (;;)
        if (clen > 0)
          {
          unsigned int otherd = NOTACHAR;
-        if ((ims & PCRE_CASELESS) != 0)
+        if (caseless)
            {
-#ifdef SUPPORT_UTF8
-          if (utf8 && d >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && d >= 128)
              {
  #ifdef SUPPORT_UCP
              otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
-#endif  /* SUPPORT_UTF8 */
-          otherd = fcc[d];
+#endif  /* SUPPORT_UTF */
+          otherd = TABLE_GET(d, fcc, d);
            }
          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
            {
@@ -2125,29 +2434,34 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
+      case OP_EXACTI:
+      case OP_NOTEXACTI:
+      caseless = TRUE;
+      codevalue -= OP_STARI - OP_STAR;
+      /* Fall through */
        case OP_EXACT:
        case OP_NOTEXACT:
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
          unsigned int otherd = NOTACHAR;
-        if ((ims & PCRE_CASELESS) != 0)
+        if (caseless)
            {
-#ifdef SUPPORT_UTF8
-          if (utf8 && d >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && d >= 128)
              {
  #ifdef SUPPORT_UCP
              otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
-#endif  /* SUPPORT_UTF8 */
-          otherd = fcc[d];
+#endif  /* SUPPORT_UTF */
+          otherd = TABLE_GET(d, fcc, d);
            }
          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
            {
            if (++count >= GET2(code, 1))
-            { ADD_NEW(state_offset + dlen + 3, 0); }
+            { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
            else
              { ADD_NEW(state_offset, count); }
            }
@@ -2155,29 +2469,38 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
+      case OP_UPTOI:
+      case OP_MINUPTOI:
+      case OP_POSUPTOI:
+      case OP_NOTUPTOI:
+      case OP_NOTMINUPTOI:
+      case OP_NOTPOSUPTOI:
+      caseless = TRUE;
+      codevalue -= OP_STARI - OP_STAR;
+      /* Fall through */
        case OP_UPTO:
        case OP_MINUPTO:
        case OP_POSUPTO:
        case OP_NOTUPTO:
        case OP_NOTMINUPTO:
        case OP_NOTPOSUPTO:
-      ADD_ACTIVE(state_offset + dlen + 3, 0);
+      ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
        count = current_state->count;  /* Number already matched */
        if (clen > 0)
          {
          unsigned int otherd = NOTACHAR;
-        if ((ims & PCRE_CASELESS) != 0)
+        if (caseless)
            {
-#ifdef SUPPORT_UTF8
-          if (utf8 && d >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && d >= 128)
              {
  #ifdef SUPPORT_UCP
              otherd = UCD_OTHERCASE(d);
  #endif  /* SUPPORT_UCP */
              }
            else
-#endif  /* SUPPORT_UTF8 */
-          otherd = fcc[d];
+#endif  /* SUPPORT_UTF */
+          otherd = TABLE_GET(d, fcc, d);
            }
          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
            {
@@ -2187,7 +2510,7 @@ for (;;)
              next_active_state--;
              }
            if (++count >= GET2(code, 1))
-            { ADD_NEW(state_offset + dlen + 3, 0); }
+            { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
            else
              { ADD_NEW(state_offset, count); }
            }
@@ -2204,18 +2527,18 @@ for (;;)
          {
          BOOL isinclass = FALSE;
          int next_state_offset;
-        const uschar *ecode;
+        const pcre_uchar *ecode;
  
          /* For a simple class, there is always just a 32-byte table, and we
          can set isinclass from it. */
  
          if (codevalue != OP_XCLASS)
            {
-          ecode = code + 33;
+          ecode = code + 1 + (32 / sizeof(pcre_uchar));
            if (clen > 0)
              {
              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
-              ((code[1 + c/8] & (1 << (c&7))) != 0);
+              ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
              }
            }
  
@@ -2226,14 +2549,14 @@ for (;;)
          else
           {
           ecode = code + GET(code, 1);
-         if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
+         if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
           }
  
          /* At this point, isinclass is set for all kinds of class, and ecode
          points to the byte after the end of the class. If there is a
          quantifier, this is where it will be. */
  
-        next_state_offset = ecode - start_code;
+        next_state_offset = (int)(ecode - start_code);
  
          switch (*ecode)
            {
@@ -2260,12 +2583,12 @@ for (;;)
            case OP_CRMINRANGE:
            count = current_state->count;  /* Already matched */
            if (count >= GET2(ecode, 1))
-            { ADD_ACTIVE(next_state_offset + 5, 0); }
+            { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
            if (isinclass)
              {
-            int max = GET2(ecode, 3);
+            int max = GET2(ecode, 1 + IMM2_SIZE);
              if (++count >= max && max != 0)   /* Max 0 => no limit */
-              { ADD_NEW(next_state_offset + 5, 0); }
+              { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
              else
                { ADD_NEW(state_offset, count); }
              }
@@ -2296,7 +2619,7 @@ for (;;)
          int rc;
          int local_offsets[2];
          int local_workspace[1000];
-        const uschar *endasscode = code + GET(code, 1);
+        const pcre_uchar *endasscode = code + GET(code, 1);
  
          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
  
@@ -2304,18 +2627,16 @@ for (;;)
            md,                                   /* static match data */
            code,                                 /* this subexpression's code */
            ptr,                                  /* where we currently are */
-          ptr - start_subject,                  /* start offset */
+          (int)(ptr - start_subject),           /* start offset */
            local_offsets,                        /* offset vector */
            sizeof(local_offsets)/sizeof(int),    /* size of same */
            local_workspace,                      /* workspace vector */
            sizeof(local_workspace)/sizeof(int),  /* size of same */
-          ims,                                  /* the current ims flags */
-          rlevel,                               /* function recursion level */
-          recursing);                           /* pass on regex recursion */
+          rlevel);                              /* function recursion level */
  
          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
-            { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
+            { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
          }
        break;
  
@@ -2335,25 +2656,30 @@ for (;;)
          if (code[LINK_SIZE+1] == OP_CALLOUT)
            {
            rrc = 0;
-          if (pcre_callout != NULL)
+          if (PUBL(callout) != NULL)
              {
-            pcre_callout_block cb;
+            PUBL(callout_block) cb;
              cb.version          = 1;   /* Version 1 of the callout block */
              cb.callout_number   = code[LINK_SIZE+2];
              cb.offset_vector    = offsets;
+#ifdef COMPILE_PCRE8
              cb.subject          = (PCRE_SPTR)start_subject;
-            cb.subject_length   = end_subject - start_subject;
-            cb.start_match      = current_subject - start_subject;
-            cb.current_position = ptr - start_subject;
+#else
+            cb.subject          = (PCRE_SPTR16)start_subject;
+#endif
+            cb.subject_length   = (int)(end_subject - start_subject);
+            cb.start_match      = (int)(current_subject - start_subject);
+            cb.current_position = (int)(ptr - start_subject);
              cb.pattern_position = GET(code, LINK_SIZE + 3);
              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
              cb.capture_top      = 1;
              cb.capture_last     = -1;
              cb.callout_data     = md->callout_data;
-            if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
+            cb.mark             = NULL;   /* No (*MARK) support */
+            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
              }
            if (rrc > 0) break;                      /* Fail this thread */
-          code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
+          code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
            }
  
          condcode = code[LINK_SIZE+1];
@@ -2374,10 +2700,10 @@ for (;;)
  
          else if (condcode == OP_RREF || condcode == OP_NRREF)
            {
-          int value = GET2(code, LINK_SIZE+2);
+          int value = GET2(code, LINK_SIZE + 2);
            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
-          if (recursing > 0)
-            { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
+          if (md->recursive != NULL)
+            { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
            }
  
@@ -2386,8 +2712,8 @@ for (;;)
          else
            {
            int rc;
-          const uschar *asscode = code + LINK_SIZE + 1;
-          const uschar *endasscode = asscode + GET(asscode, 1);
+          const pcre_uchar *asscode = code + LINK_SIZE + 1;
+          const pcre_uchar *endasscode = asscode + GET(asscode, 1);
  
            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
  
@@ -2395,19 +2721,17 @@ for (;;)
              md,                                   /* fixed match data */
              asscode,                              /* this subexpression's code */
              ptr,                                  /* where we currently are */
-            ptr - start_subject,                  /* start offset */
+            (int)(ptr - start_subject),           /* start offset */
              local_offsets,                        /* offset vector */
              sizeof(local_offsets)/sizeof(int),    /* size of same */
              local_workspace,                      /* workspace vector */
              sizeof(local_workspace)/sizeof(int),  /* size of same */
-            ims,                                  /* the current ims flags */
-            rlevel,                               /* function recursion level */
-            recursing);                           /* pass on regex recursion */
+            rlevel);                              /* function recursion level */
  
            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
            if ((rc >= 0) ==
                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
-            { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
+            { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
            else
              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
            }
@@ -2417,28 +2741,47 @@ for (;;)
        /*-----------------------------------------------------------------*/
        case OP_RECURSE:
          {
+        dfa_recursion_info *ri;
          int local_offsets[1000];
          int local_workspace[1000];
+        const pcre_uchar *callpat = start_code + GET(code, 1);
+        int recno = (callpat == md->start_code)? 0 :
+          GET2(callpat, 1 + LINK_SIZE);
          int rc;
  
-        DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
-          recursing + 1));
+        DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
+
+        /* Check for repeating a recursion without advancing the subject
+        pointer. This should catch convoluted mutual recursions. (Some simple
+        cases are caught at compile time.) */
+
+        for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
+          if (recno == ri->group_num && ptr == ri->subject_position)
+            return PCRE_ERROR_RECURSELOOP;
+
+        /* Remember this recursion and where we started it so as to
+        catch infinite loops. */
+
+        new_recursive.group_num = recno;
+        new_recursive.subject_position = ptr;
+        new_recursive.prevrec = md->recursive;
+        md->recursive = &new_recursive;
  
          rc = internal_dfa_exec(
            md,                                   /* fixed match data */
-          start_code + GET(code, 1),            /* this subexpression's code */
+          callpat,                              /* this subexpression's code */
            ptr,                                  /* where we currently are */
-          ptr - start_subject,                  /* start offset */
+          (int)(ptr - start_subject),           /* start offset */
            local_offsets,                        /* offset vector */
            sizeof(local_offsets)/sizeof(int),    /* size of same */
            local_workspace,                      /* workspace vector */
            sizeof(local_workspace)/sizeof(int),  /* size of same */
-          ims,                                  /* the current ims flags */
-          rlevel,                               /* function recursion level */
-          recursing + 1);                       /* regex recurse level */
+          rlevel);                              /* function recursion level */
+
+        md->recursive = new_recursive.prevrec;  /* Done this recursion */
  
-        DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
-          recursing + 1, rc));
+        DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
+          rc));
  
          /* Ran out of internal offsets */
  
@@ -2452,10 +2795,15 @@ for (;;)
            {
            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
              {
-            const uschar *p = start_subject + local_offsets[rc];
-            const uschar *pp = start_subject + local_offsets[rc+1];
              int charcount = local_offsets[rc+1] - local_offsets[rc];
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+#ifdef SUPPORT_UTF
+            if (utf)
+              {
+              const pcre_uchar *p = start_subject + local_offsets[rc];
+              const pcre_uchar *pp = start_subject + local_offsets[rc+1];
+              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              }
+#endif
              if (charcount > 0)
                {
                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
@@ -2471,7 +2819,99 @@ for (;;)
        break;
  
        /*-----------------------------------------------------------------*/
+      case OP_BRAPOS:
+      case OP_SBRAPOS:
+      case OP_CBRAPOS:
+      case OP_SCBRAPOS:
+      case OP_BRAPOSZERO:
+        {
+        int charcount, matched_count;
+        const pcre_uchar *local_ptr = ptr;
+        BOOL allow_zero;
+
+        if (codevalue == OP_BRAPOSZERO)
+          {
+          allow_zero = TRUE;
+          codevalue = *(++code);  /* Codevalue will be one of above BRAs */
+          }
+        else allow_zero = FALSE;
+
+        /* Loop to match the subpattern as many times as possible as if it were
+        a complete pattern. */
+
+        for (matched_count = 0;; matched_count++)
+          {
+          int local_offsets[2];
+          int local_workspace[1000];
+
+          int rc = internal_dfa_exec(
+            md,                                   /* fixed match data */
+            code,                                 /* this subexpression's code */
+            local_ptr,                            /* where we currently are */
+            (int)(ptr - start_subject),           /* start offset */
+            local_offsets,                        /* offset vector */
+            sizeof(local_offsets)/sizeof(int),    /* size of same */
+            local_workspace,                      /* workspace vector */
+            sizeof(local_workspace)/sizeof(int),  /* size of same */
+            rlevel);                              /* function recursion level */
+
+          /* Failed to match */
+
+          if (rc < 0)
+            {
+            if (rc != PCRE_ERROR_NOMATCH) return rc;
+            break;
+            }
+
+          /* Matched: break the loop if zero characters matched. */
+
+          charcount = local_offsets[1] - local_offsets[0];
+          if (charcount == 0) break;
+          local_ptr += charcount;    /* Advance temporary position ptr */
+          }
+
+        /* At this point we have matched the subpattern matched_count
+        times, and local_ptr is pointing to the character after the end of the
+        last match. */
+
+        if (matched_count > 0 || allow_zero)
+          {
+          const pcre_uchar *end_subpattern = code;
+          int next_state_offset;
+
+          do { end_subpattern += GET(end_subpattern, 1); }
+            while (*end_subpattern == OP_ALT);
+          next_state_offset =
+            (int)(end_subpattern - start_code + LINK_SIZE + 1);
+
+          /* Optimization: if there are no more active states, and there
+          are no new states yet set up, then skip over the subject string
+          right here, to save looping. Otherwise, set up the new state to swing
+          into action when the end of the matched substring is reached. */
+
+          if (i + 1 >= active_count && new_count == 0)
+            {
+            ptr = local_ptr;
+            clen = 0;
+            ADD_NEW(next_state_offset, 0);
+            }
+          else
+            {
+            const pcre_uchar *p = ptr;
+            const pcre_uchar *pp = local_ptr;
+            charcount = (int)(pp - p);
+#ifdef SUPPORT_UTF
+            if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+#endif
+            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
+            }
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
        case OP_ONCE:
+      case OP_ONCE_NC:
          {
          int local_offsets[2];
          int local_workspace[1000];
@@ -2480,24 +2920,23 @@ for (;;)
            md,                                   /* fixed match data */
            code,                                 /* this subexpression's code */
            ptr,                                  /* where we currently are */
-          ptr - start_subject,                  /* start offset */
+          (int)(ptr - start_subject),           /* start offset */
            local_offsets,                        /* offset vector */
            sizeof(local_offsets)/sizeof(int),    /* size of same */
            local_workspace,                      /* workspace vector */
            sizeof(local_workspace)/sizeof(int),  /* size of same */
-          ims,                                  /* the current ims flags */
-          rlevel,                               /* function recursion level */
-          recursing);                           /* pass on regex recursion */
+          rlevel);                              /* function recursion level */
  
          if (rc >= 0)
            {
-          const uschar *end_subpattern = code;
+          const pcre_uchar *end_subpattern = code;
            int charcount = local_offsets[1] - local_offsets[0];
            int next_state_offset, repeat_state_offset;
  
            do { end_subpattern += GET(end_subpattern, 1); }
              while (*end_subpattern == OP_ALT);
-          next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
+          next_state_offset =
+            (int)(end_subpattern - start_code + LINK_SIZE + 1);
  
            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
            arrange for the repeat state also to be added to the relevant list.
@@ -2505,7 +2944,7 @@ for (;;)
  
            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
                                   *end_subpattern == OP_KETRMIN)?
-            end_subpattern - start_code - GET(end_subpattern, 1) : -1;
+            (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
  
            /* If we have matched an empty string, add the next state at the
            current character pointer. This is important so that the duplicate
@@ -2520,7 +2959,7 @@ for (;;)
            /* Optimization: if there are no more active states, and there
            are no new states yet set up, then skip over the subject string
            right here, to save looping. Otherwise, set up the new state to swing
-          into action when the end of the substring is reached. */
+          into action when the end of the matched substring is reached. */
  
            else if (i + 1 >= active_count && new_count == 0)
              {
@@ -2543,14 +2982,18 @@ for (;;)
              }
            else
              {
-            const uschar *p = start_subject + local_offsets[0];
-            const uschar *pp = start_subject + local_offsets[1];
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+#ifdef SUPPORT_UTF
+            if (utf)
+              {
+              const pcre_uchar *p = start_subject + local_offsets[0];
+              const pcre_uchar *pp = start_subject + local_offsets[1];
+              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              }
+#endif
              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
              if (repeat_state_offset >= 0)
                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
              }
-
            }
          else if (rc != PCRE_ERROR_NOMATCH) return rc;
          }
@@ -2562,25 +3005,30 @@ for (;;)
  
        case OP_CALLOUT:
        rrc = 0;
-      if (pcre_callout != NULL)
+      if (PUBL(callout) != NULL)
          {
-        pcre_callout_block cb;
+        PUBL(callout_block) cb;
          cb.version          = 1;   /* Version 1 of the callout block */
          cb.callout_number   = code[1];
          cb.offset_vector    = offsets;
+#ifdef COMPILE_PCRE8
          cb.subject          = (PCRE_SPTR)start_subject;
-        cb.subject_length   = end_subject - start_subject;
-        cb.start_match      = current_subject - start_subject;
-        cb.current_position = ptr - start_subject;
+#else
+        cb.subject          = (PCRE_SPTR16)start_subject;
+#endif
+        cb.subject_length   = (int)(end_subject - start_subject);
+        cb.start_match      = (int)(current_subject - start_subject);
+        cb.current_position = (int)(ptr - start_subject);
          cb.pattern_position = GET(code, 2);
          cb.next_item_length = GET(code, 2 + LINK_SIZE);
          cb.capture_top      = 1;
          cb.capture_last     = -1;
          cb.callout_data     = md->callout_data;
-        if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
+        cb.mark             = NULL;   /* No (*MARK) support */
+        if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
          }
        if (rrc == 0)
-        { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
+        { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
        break;
  
  
@@ -2609,7 +3057,7 @@ for (;;)
    if (new_count <= 0)
      {
      if (rlevel == 1 &&                               /* Top level, and */
-        could_continue &&                            /* Some could go on */
+        could_continue &&                            /* Some could go on, and */
          forced_fail != workspace[1] &&               /* Not all forced fail & */
          (                                            /* either... */
          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
@@ -2617,13 +3065,18 @@ for (;;)
          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
           match_count < 0)                            /* no matches */
          ) &&                                         /* And... */
-        ptr >= end_subject &&                     /* Reached end of subject */
-        ptr > current_subject)                    /* Matched non-empty string */
+        (
+        partial_newline ||                           /* Either partial NL */
+          (                                          /* or ... */
+          ptr >= end_subject &&                /* End of subject and */
+          ptr > md->start_used_ptr)            /* Inspected non-empty string */
+          )
+        )
        {
        if (offsetcount >= 2)
          {
-        offsets[0] = md->start_used_ptr - start_subject;
-        offsets[1] = end_subject - start_subject;
+        offsets[0] = (int)(md->start_used_ptr - start_subject);
+        offsets[1] = (int)(end_subject - start_subject);
          }
        match_count = PCRE_ERROR_PARTIAL;
        }
@@ -2677,28 +3130,33 @@ Returns:          > 0 => number of match offset pairs placed in offsets
                   < -1 => some kind of unexpected problem
  */
  
+#ifdef COMPILE_PCRE8
  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
    const char *subject, int length, int start_offset, int options, int *offsets,
    int offsetcount, int *workspace, int wscount)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
+  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount, int *workspace, int wscount)
+#endif
  {
-real_pcre *re = (real_pcre *)argument_re;
+REAL_PCRE *re = (REAL_PCRE *)argument_re;
  dfa_match_data match_block;
  dfa_match_data *md = &match_block;
-BOOL utf8, anchored, startline, firstline;
-const uschar *current_subject, *end_subject, *lcc;
-
-pcre_study_data internal_study;
+BOOL utf, anchored, startline, firstline;
+const pcre_uchar *current_subject, *end_subject;
  const pcre_study_data *study = NULL;
-real_pcre internal_re;
-
-const uschar *req_byte_ptr;
-const uschar *start_bits = NULL;
-BOOL first_byte_caseless = FALSE;
-BOOL req_byte_caseless = FALSE;
-int first_byte = -1;
-int req_byte = -1;
-int req_byte2 = -1;
+
+const pcre_uchar *req_char_ptr;
+const pcre_uint8 *start_bits = NULL;
+BOOL has_first_char = FALSE;
+BOOL has_req_char = FALSE;
+pcre_uchar first_char = 0;
+pcre_uchar first_char2 = 0;
+pcre_uchar req_char = 0;
+pcre_uchar req_char2 = 0;
  int newline;
  
  /* Plausibility checks */
@@ -2708,11 +3166,29 @@ if (re == NULL || subject == NULL || workspace == NULL ||
     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
+if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
  
-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
+if (re->magic_number != MAGIC_NUMBER)
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+  {
+  if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
+    workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+      return PCRE_ERROR_DFA_BADRESTART;
+  }
+
+/* Set up study, callout, and table data */
  
  md->tables = re->tables;
  md->callout_data = NULL;
@@ -2731,28 +3207,17 @@ if (extra_data != NULL)
      md->tables = extra_data->tables;
    }
  
-/* Check that the first field in the block is the magic number. If it is not,
-test for a regex that was compiled on a host of opposite endianness. If this is
-the case, flipped values are put in internal_re and internal_study if there was
-study data too. */
-
-if (re->magic_number != MAGIC_NUMBER)
-  {
-  re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
-  if (re == NULL) return PCRE_ERROR_BADMAGIC;
-  if (study != NULL) study = &internal_study;
-  }
-
  /* Set some local values */
  
-current_subject = (const unsigned char *)subject + start_offset;
-end_subject = (const unsigned char *)subject + length;
-req_byte_ptr = current_subject - 1;
+current_subject = (const pcre_uchar *)subject + start_offset;
+end_subject = (const pcre_uchar *)subject + length;
+req_char_ptr = current_subject - 1;
  
-#ifdef SUPPORT_UTF8
-utf8 = (re->options & PCRE_UTF8) != 0;
+#ifdef SUPPORT_UTF
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+utf = (re->options & PCRE_UTF8) != 0;
  #else
-utf8 = FALSE;
+utf = FALSE;
  #endif
  
  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
@@ -2760,9 +3225,9 @@ anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
  
  /* The remaining fixed data for passing around. */
  
-md->start_code = (const uschar *)argument_re +
+md->start_code = (const pcre_uchar *)argument_re +
      re->name_table_offset + re->name_count * re->name_entry_size;
-md->start_subject = (const unsigned char *)subject;
+md->start_subject = (const pcre_uchar *)subject;
  md->end_subject = end_subject;
  md->start_offset = start_offset;
  md->moptions = options;
@@ -2823,20 +3288,24 @@ else
  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
  back the character offset. */
  
-#ifdef SUPPORT_UTF8
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
+#ifdef SUPPORT_UTF
+if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
    {
-  if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
-    return PCRE_ERROR_BADUTF8;
-  if (start_offset > 0 && start_offset < length)
+  int erroroffset;
+  int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
+  if (errorcode != 0)
      {
-    int tb = ((uschar *)subject)[start_offset];
-    if (tb > 127)
+    if (offsetcount >= 2)
        {
-      tb &= 0xc0;
-      if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
+      offsets[0] = erroroffset;
+      offsets[1] = errorcode;
        }
+    return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
+      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
      }
+  if (start_offset > 0 && start_offset < length &&
+        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
+    return PCRE_ERROR_BADUTF8_OFFSET;
    }
  #endif
  
@@ -2844,12 +3313,11 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
  is a feature that makes it possible to save compiled regex and re-use them
  in other programs later. */
  
-if (md->tables == NULL) md->tables = _pcre_default_tables;
+if (md->tables == NULL) md->tables = PRIV(default_tables);
  
-/* The lower casing table and the "must be at the start of a line" flag are
-used in a loop when finding where to start. */
+/* The "must be at the start of a line" flags are used in a loop when finding
+where to start. */
  
-lcc = md->tables + lcc_offset;
  startline = (re->flags & PCRE_STARTLINE) != 0;
  firstline = (re->options & PCRE_FIRSTLINE) != 0;
  
@@ -2863,9 +3331,16 @@ if (!anchored)
    {
    if ((re->flags & PCRE_FIRSTSET) != 0)
      {
-    first_byte = re->first_byte & 255;
-    if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
-      first_byte = lcc[first_byte];
+    has_first_char = TRUE;
+    first_char = first_char2 = (pcre_uchar)(re->first_char);
+    if ((re->flags & PCRE_FCH_CASELESS) != 0)
+      {
+      first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+      if (utf && first_char > 127)
+        first_char2 = UCD_OTHERCASE(first_char);
+#endif
+      }
      }
    else
      {
@@ -2880,9 +3355,16 @@ character" set. */
  
  if ((re->flags & PCRE_REQCHSET) != 0)
    {
-  req_byte = re->req_byte & 255;
-  req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
-  req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
+  has_req_char = TRUE;
+  req_char = req_char2 = (pcre_uchar)(re->req_char);
+  if ((re->flags & PCRE_RCH_CASELESS) != 0)
+    {
+    req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+    if (utf && req_char > 127)
+      req_char2 = UCD_OTHERCASE(req_char);
+#endif
+    }
    }
  
  /* Call the main matching function, looping for a non-anchored regex after a
@@ -2895,7 +3377,7 @@ for (;;)
  
    if ((options & PCRE_DFA_RESTART) == 0)
      {
-    const uschar *save_end_subject = end_subject;
+    const pcre_uchar *save_end_subject = end_subject;
  
      /* If firstline is TRUE, the start of the match is constrained to the first
      line of a multiline string. Implement this by temporarily adjusting
@@ -2904,14 +3386,14 @@ for (;;)
  
      if (firstline)
        {
-      USPTR t = current_subject;
-#ifdef SUPPORT_UTF8
-      if (utf8)
+      PCRE_PUCHAR t = current_subject;
+#ifdef SUPPORT_UTF
+      if (utf)
          {
          while (t < md->end_subject && !IS_NEWLINE(t))
            {
            t++;
-          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+          ACROSSCHAR(t < end_subject, *t, t++);
            }
          }
        else
@@ -2922,21 +3404,23 @@ for (;;)
  
      /* There are some optimizations that avoid running the match if a known
      starting point is not found. However, there is an option that disables
-    these, for testing and for ensuring that all callouts do actually occur. */
+    these, for testing and for ensuring that all callouts do actually occur.
+    The option can be set in the regex by (*NO_START_OPT) or passed in
+    match-time options. */
  
-    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
+    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
        {
-      /* Advance to a known first byte. */
+      /* Advance to a known first char. */
  
-      if (first_byte >= 0)
+      if (has_first_char)
          {
-        if (first_byte_caseless)
+        if (first_char != first_char2)
            while (current_subject < end_subject &&
-                 lcc[*current_subject] != first_byte)
+              *current_subject != first_char && *current_subject != first_char2)
              current_subject++;
          else
            while (current_subject < end_subject &&
-                 *current_subject != first_byte)
+                 *current_subject != first_char)
              current_subject++;
          }
  
@@ -2946,16 +3430,15 @@ for (;;)
          {
          if (current_subject > md->start_subject + start_offset)
            {
-#ifdef SUPPORT_UTF8
-          if (utf8)
+#ifdef SUPPORT_UTF
+          if (utf)
              {
              while (current_subject < end_subject &&
                     !WAS_NEWLINE(current_subject))
                {
                current_subject++;
-              while(current_subject < end_subject &&
-                    (*current_subject & 0xc0) == 0x80)
-                current_subject++;
+              ACROSSCHAR(current_subject < end_subject, *current_subject,
+                current_subject++);
                }
              }
            else
@@ -2981,9 +3464,22 @@ for (;;)
          {
          while (current_subject < end_subject)
            {
-          register unsigned int c = *current_subject;
-          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
-            else break;
+          unsigned int c = *current_subject;
+#ifndef COMPILE_PCRE8
+          if (c > 255) c = 255;
+#endif
+          if ((start_bits[c/8] & (1 << (c&7))) == 0)
+            {
+            current_subject++;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+            /* In non 8-bit mode, the iteration will stop for
+            characters > 255 at the beginning or not stop at all. */
+            if (utf)
+              ACROSSCHAR(current_subject < end_subject, *current_subject,
+                current_subject++);
+#endif
+            }
+          else break;
            }
          }
        }
@@ -2996,7 +3492,7 @@ for (;;)
      disabling is explicitly requested (and of course, by the test above, this
      code is not obeyed when restarting after a partial match). */
  
-    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
+    if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
        {
        /* If the pattern was studied, a minimum subject length may be set. This
@@ -3008,8 +3504,8 @@ for (;;)
            (pcre_uint32)(end_subject - current_subject) < study->minlength)
          return PCRE_ERROR_NOMATCH;
  
-      /* If req_byte is set, we know that that character must appear in the
-      subject for the match to succeed. If the first character is set, req_byte
+      /* If req_char is set, we know that that character must appear in the
+      subject for the match to succeed. If the first character is set, req_char
        must be later in the subject; otherwise the test starts at the match
        point. This optimization can save a huge amount of work in patterns with
        nested unlimited repeats that aren't going to match. Writing separate
@@ -3021,28 +3517,28 @@ for (;;)
        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
        string... so we don't do this when the string is sufficiently long. */
  
-      if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
+      if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
          {
-        register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
+        PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
  
          /* We don't need to repeat the search if we haven't yet reached the
          place we found it at last time. */
  
-        if (p > req_byte_ptr)
+        if (p > req_char_ptr)
            {
-          if (req_byte_caseless)
+          if (req_char != req_char2)
              {
              while (p < end_subject)
                {
-              register int pp = *p++;
-              if (pp == req_byte || pp == req_byte2) { p--; break; }
+              int pp = *p++;
+              if (pp == req_char || pp == req_char2) { p--; break; }
                }
              }
            else
              {
              while (p < end_subject)
                {
-              if (*p++ == req_byte) { p--; break; }
+              if (*p++ == req_char) { p--; break; }
                }
              }
  
@@ -3055,7 +3551,7 @@ for (;;)
            found it, so that we don't search again next time round the loop if
            the start hasn't passed this character yet. */
  
-          req_byte_ptr = p;
+          req_char_ptr = p;
            }
          }
        }
@@ -3064,6 +3560,7 @@ for (;;)
    /* OK, now we can do the business */
  
    md->start_used_ptr = current_subject;
+  md->recursive = NULL;
  
    rc = internal_dfa_exec(
      md,                                /* fixed match data */
@@ -3074,9 +3571,7 @@ for (;;)
      offsetcount,                       /* size of same */
      workspace,                         /* workspace vector */
      wscount,                           /* size of same */
-    re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
-    0,                                 /* function recurse level */
-    0);                                /* regex recurse level */
+    0);                                /* function recurse level */
  
    /* Anything other than "no match" means we are done, always; otherwise, carry
    on only if not anchored. */
@@ -3088,11 +3583,13 @@ for (;;)
  
    if (firstline && IS_NEWLINE(current_subject)) break;
    current_subject++;
-  if (utf8)
+#ifdef SUPPORT_UTF
+  if (utf)
      {
-    while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
-      current_subject++;
+    ACROSSCHAR(current_subject < end_subject, *current_subject,
+      current_subject++);
      }
+#endif
    if (current_subject > end_subject) break;
  
    /* If we have just passed a CR and we are now at a LF, and the pattern does