-----------------------------------------------------------------------------
*/
-
/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true
-FSM). This is NOT Perl- compatible, but it has advantages in certain
+FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */
int data; /* Some use extra data */
} stateblock;
-#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
#ifdef PCRE_DEBUG
next_new_state->count = (y); \
next_new_state->data = (z); \
next_new_state++; \
- DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
+ DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
+ (x), (y), (z), __LINE__)); \
} \
else return PCRE_ERROR_DFA_WSSIZE
BOOL utf = FALSE;
#endif
+BOOL reset_could_continue = FALSE;
+
rlevel++;
offsetcount &= (-2);
int clen, dlen;
unsigned int c, d;
int forced_fail = 0;
- BOOL could_continue = FALSE;
+ BOOL partial_newline = FALSE;
+ BOOL could_continue = reset_could_continue;
+ reset_could_continue = FALSE;
/* Make the new state list into the active state list and empty the
new state list. */
if (ptr < end_subject)
{
- clen = 1; /* Number of bytes in the character */
+ clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(c, ptr, clen); } else
#endif /* SUPPORT_UTF */
/* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have
- been skipped". */
+ been skipped". If the could_continue flag was passed over from a previous
+ state, arrange for it to passed on. */
if (state_offset < 0)
{
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
ADD_NEW_DATA(state_offset, current_state->count,
current_state->data - 1);
+ if (could_continue) reset_could_continue = TRUE;
continue;
}
else
permitted.
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
- argument that is not a data character - but is always one byte long. We
- have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
- this case. To keep the other cases fast, convert these ones to new opcodes.
- */
+ argument that is not a data character - but is always one byte long because
+ the values are small. We have to take special action to deal with \P, \p,
+ \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
+ these ones to new opcodes. */
if (coptable[codevalue] > 0)
{
offsets[0] = (int)(current_subject - start_subject);
offsets[1] = (int)(ptr - start_subject);
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
- offsets[1] - offsets[0], current_subject));
+ offsets[1] - offsets[0], (char *)current_subject));
}
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
{
/*-----------------------------------------------------------------*/
case OP_ANY:
if (clen > 0 && !IS_NEWLINE(ptr))
- { ADD_NEW(state_offset + 1, 0); }
+ {
+ if (ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ could_continue = partial_newline = TRUE;
+ }
+ else
+ {
+ ADD_NEW(state_offset + 1, 0);
+ }
+ }
break;
/*-----------------------------------------------------------------*/
(ptr == end_subject - md->nllen)
))
{ ADD_ACTIVE(state_offset + 1, 0); }
+ else if (ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+ {
+ reset_could_continue = TRUE;
+ ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+ }
+ else could_continue = partial_newline = TRUE;
+ }
}
break;
else if (clen == 0 ||
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }
+ else if (ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+ {
+ reset_could_continue = TRUE;
+ ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+ }
+ else could_continue = partial_newline = TRUE;
+ }
}
else if (IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); }
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[1])
{
case PT_ANY:
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
+ OK = PRIV(ucp_gentype)[chartype] == code[2];
break;
case PT_PC:
- OK = prop->chartype == code[2];
+ OK = chartype == code[2];
break;
case PT_SC:
- OK = prop->script == code[2];
+ OK = UCD_SCRIPT(c) == code[2];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
- if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ could_continue = partial_newline = TRUE;
+ }
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
- if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ could_continue = partial_newline = TRUE;
+ }
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
- if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ could_continue = partial_newline = TRUE;
+ }
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ could_continue = partial_newline = TRUE;
+ }
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
+ if (d == OP_ANY && ptr + 1 >= md->end_subject &&
+ (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ c == NLBLOCK->nl[0])
+ {
+ could_continue = partial_newline = TRUE;
+ }
+ else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
break;
case PT_PC:
- OK = prop->chartype == code[3];
+ OK = chartype == code[3];
break;
case PT_SC:
- OK = prop->script == code[3];
+ OK = UCD_SCRIPT(c) == code[3];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
+ OK = PRIV(ucp_gentype)[chartype] == code[3];
break;
case PT_PC:
- OK = prop->chartype == code[3];
+ OK = chartype == code[3];
break;
case PT_SC:
- OK = prop->script == code[3];
+ OK = UCD_SCRIPT(c) == code[3];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
if (clen > 0)
{
BOOL OK;
- const ucd_record * prop = GET_UCD(c);
+ const pcre_uint8 chartype = UCD_CHARTYPE(c);
switch(code[1 + IMM2_SIZE + 1])
{
case PT_ANY:
break;
case PT_LAMP:
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt;
+ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+ chartype == ucp_Lt;
break;
case PT_GC:
- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
+ OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
break;
case PT_PC:
- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
+ OK = chartype == code[1 + IMM2_SIZE + 2];
break;
case PT_SC:
- OK = prop->script == code[1 + IMM2_SIZE + 2];
+ OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
break;
/* These are specials for combination cases. */
case PT_ALNUM:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
case PT_PXSPACE: /* POSIX space */
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
break;
case PT_WORD:
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
ncount++;
nptr += ndlen;
}
+ if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+ reset_could_continue = TRUE;
if (++count >= GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
ncount++;
nptr += nclen;
}
+ if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
+ reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
}
break;
break;
case 0x000d:
- if (ptr + 1 < end_subject && ptr[1] == 0x0a)
+ if (ptr + 1 >= end_subject)
+ {
+ ADD_NEW(state_offset + 1, 0);
+ if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
+ reset_could_continue = TRUE;
+ }
+ else if (ptr[1] == 0x0a)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
break;
/*-----------------------------------------------------------------*/
- /* Match a negated single character casefully. This is only used for
- one-byte characters, that is, we know that d < 256. The character we are
- checking (c) can be multibyte. */
+ /* Match a negated single character casefully. */
case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;
/*-----------------------------------------------------------------*/
- /* Match a negated single character caselessly. This is only used for
- one-byte characters, that is, we know that d < 256. The character we are
- checking (c) can be multibyte. */
+ /* Match a negated single character caselessly. */
case OP_NOTI:
- if (clen > 0 && c != d && c != fcc[d])
- { ADD_NEW(state_offset + dlen + 1, 0); }
+ if (clen > 0)
+ {
+ unsigned int otherd;
+#ifdef SUPPORT_UTF
+ if (utf && d >= 128)
+ {
+#ifdef SUPPORT_UCP
+ otherd = UCD_OTHERCASE(d);
+#endif /* SUPPORT_UCP */
+ }
+ else
+#endif /* SUPPORT_UTF */
+ otherd = TABLE_GET(d, fcc, d);
+ if (c != d && c != otherd)
+ { ADD_NEW(state_offset + dlen + 1, 0); }
+ }
break;
/*-----------------------------------------------------------------*/
{
int charcount = local_offsets[rc+1] - local_offsets[rc];
#ifdef SUPPORT_UTF
- const pcre_uchar *p = start_subject + local_offsets[rc];
- const pcre_uchar *pp = start_subject + local_offsets[rc+1];
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+ if (utf)
+ {
+ const pcre_uchar *p = start_subject + local_offsets[rc];
+ const pcre_uchar *pp = start_subject + local_offsets[rc+1];
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+ }
#endif
if (charcount > 0)
{
const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p);
#ifdef SUPPORT_UTF
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+ if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}
else
{
#ifdef SUPPORT_UTF
- const pcre_uchar *p = start_subject + local_offsets[0];
- const pcre_uchar *pp = start_subject + local_offsets[1];
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+ if (utf)
+ {
+ const pcre_uchar *p = start_subject + local_offsets[0];
+ const pcre_uchar *pp = start_subject + local_offsets[1];
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+ }
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)
if (new_count <= 0)
{
if (rlevel == 1 && /* Top level, and */
- could_continue && /* Some could go on */
+ could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
match_count < 0) /* no matches */
) && /* And... */
- ptr >= end_subject && /* Reached end of subject */
- ptr > md->start_used_ptr) /* Inspected non-empty string */
+ (
+ partial_newline || /* Either partial NL */
+ ( /* or ... */
+ ptr >= end_subject && /* End of subject and */
+ ptr > md->start_used_ptr) /* Inspected non-empty string */
+ )
+ )
{
if (offsetcount >= 2)
{
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
+if (re->magic_number != MAGIC_NUMBER)
+ return re->magic_number == REVERSED_MAGIC_NUMBER?
+ PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+ {
+ if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
+ workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+ return PCRE_ERROR_DFA_BADRESTART;
+ }
+
+/* Set up study, callout, and table data */
md->tables = re->tables;
md->callout_data = NULL;
md->tables = extra_data->tables;
}
-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
- return re->magic_number == REVERSED_MAGIC_NUMBER?
- PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
/* Set some local values */
current_subject = (const pcre_uchar *)subject + start_offset;
{
while (current_subject < end_subject)
{
- register unsigned int c = *current_subject;
+ unsigned int c = *current_subject;
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
{
- register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
+ PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
{
while (p < end_subject)
{
- register int pp = *p++;
+ int pp = *p++;
if (pp == req_char || pp == req_char2) { p--; break; }
}
}