#define regcppush(a,b,c) S_regcppush(aTHX_ a,b,c)
#define reghop3 S_reghop3
#define reghopmaybe3 S_reghopmaybe3
-#define reginclass(a,b,c,d) S_reginclass(aTHX_ a,b,c,d)
+#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e)
#define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c)
#define regrepeat(a,b,c,d,e,f) S_regrepeat(aTHX_ a,b,c,d,e,f)
#define regtry(a,b) S_regtry(aTHX_ a,b)
#define PERL_ARGS_ASSERT_REGHOPMAYBE3 \
assert(s); assert(lim)
-STATIC bool S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target)
+STATIC bool S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8 * const p, const U8 * const p_end, bool const utf8_target)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_2)
- __attribute__nonnull__(pTHX_3);
+ __attribute__nonnull__(pTHX_3)
+ __attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT_REGINCLASS \
- assert(n); assert(p)
+ assert(n); assert(p); assert(p_end)
STATIC SSize_t S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
__attribute__warn_unused_result__
/* Valid for non-utf8 strings: avoids the reginclass
* call if there are no complications: i.e., if everything matchable is
* straight forward in the bitmap */
-#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0) \
+#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,c+1,0) \
: ANYOF_BITMAP_TEST(p,*(c)))
/*
} \
else { \
U8 * const r = reghop3((U8*)s, -1, (U8*)reginfo->strbeg); \
- tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
+ tmp = utf8n_to_uvchr(r, (U8*) reginfo->strend - r, \
+ 0, UTF8_ALLOW_DEFAULT); \
} \
tmp = TeSt1_UtF8; \
- LOAD_UTF8_CHARCLASS_ALNUM(); \
+ LOAD_UTF8_CHARCLASS_ALNUM(); \
REXEC_FBC_UTF8_SCAN( \
- if (tmp == ! (TeSt2_UtF8)) { \
+ if (tmp == ! (TeSt2_UtF8)) { \
tmp = !tmp; \
IF_SUCCESS; \
} \
case ANYOF_SYNTHETIC:
if (utf8_target) {
REXEC_FBC_UTF8_CLASS_SCAN(
- reginclass(prog, c, (U8*)s, utf8_target));
+ reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
}
else {
REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
const U8 * const r =
reghop3((U8*)locinput, -1, (U8*)(reginfo->strbeg));
- ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
+ ln = utf8n_to_uvchr(r, (U8*) reginfo->strend - r,
+ 0, uniflags);
}
if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
ln = isWORDCHAR_uni(ln);
if (NEXTCHR_IS_EOS)
sayNO;
if (utf8_target) {
- if (!reginclass(rex, scan, (U8*)locinput, utf8_target))
+ if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+ utf8_target))
sayNO;
locinput += UTF8SKIP(locinput);
}
if (utf8_target) {
while (hardcount < max
&& scan < loceol
- && reginclass(prog, p, (U8*)scan, utf8_target))
+ && reginclass(prog, p, (U8*)scan, (U8*) loceol, utf8_target))
{
scan += UTF8SKIP(scan);
hardcount++;
n is the ANYOF regnode
p is the target string
+ p_end points to one byte beyond the end of the target string
utf8_target tells whether p is in UTF-8.
Returns true if matched; false otherwise.
*/
STATIC bool
-S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
+S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
{
dVAR;
const char flags = ANYOF_FLAGS(n);
* UTF8_IS_INVARIANT() works even if not in UTF-8 */
if (! UTF8_IS_INVARIANT(c) && utf8_target) {
STRLEN c_len = 0;
- c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
+ c = utf8n_to_uvchr(p, p_end - p, &c_len,
(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
| UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY);
/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for