From 635cd5d4249dc12aee8715be32cb34c2f327a56f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 23 Oct 2012 21:25:30 -0600 Subject: [PATCH] regexec.c: Remove dead code An ANYOF node now no longer matches more than one character, since 9d53c4576e551530162e7cd79ab72ed81b1e1a0f. This code was overlooked in the clean up commit e0193e472b025d41438e251be622aad42c9af9cc. Since the maximum match is 1 character, there is no point in passing a ptr that was set to indicate how far the match went, so that parameter is removed. --- embed.fnc | 4 ++-- embed.h | 2 +- proto.h | 2 +- regexec.c | 43 +++++++++---------------------------------- 4 files changed, 13 insertions(+), 38 deletions(-) diff --git a/embed.fnc b/embed.fnc index b85173d..b7a8431 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2006,8 +2006,8 @@ Es |U8 |regtail_study |NN struct RExC_state_t *pRExC_state \ ERs |I32 |regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog ERs |I32 |regrepeat |NN const regexp *prog|NN char **startposp|NN const regnode *p|I32 max|int depth ERs |I32 |regtry |NN regmatch_info *reginfo|NN char **startposp -ERs |bool |reginclass |NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p|NULLOK STRLEN *lenp\ - |bool const do_utf8sv_is_utf8 +ERs |bool |reginclass |NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p\ + |bool const utf8_target Es |CHECKPOINT|regcppush |NN const regexp *rex|I32 parenfloor Es |void |regcppop |NN regexp *rex ERsn |U8* |reghop3 |NN U8 *s|I32 off|NN const U8 *lim diff --git a/embed.h b/embed.h index 0b54ebb..b21078c 100644 --- a/embed.h +++ b/embed.h @@ -967,7 +967,7 @@ #define regcppush(a,b) S_regcppush(aTHX_ a,b) #define reghop3 S_reghop3 #define reghopmaybe3 S_reghopmaybe3 -#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e) +#define reginclass(a,b,c,d) S_reginclass(aTHX_ a,b,c,d) #define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c) #define regrepeat(a,b,c,d,e) S_regrepeat(aTHX_ a,b,c,d,e) #define regtry(a,b) S_regtry(aTHX_ a,b) diff --git a/proto.h b/proto.h index c6b05b7..d756e1c 100644 --- a/proto.h +++ b/proto.h @@ -6786,7 +6786,7 @@ STATIC U8* S_reghopmaybe3(U8 *s, I32 off, const U8 *lim) #define PERL_ARGS_ASSERT_REGHOPMAYBE3 \ assert(s); assert(lim) -STATIC bool S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8 * const p, STRLEN *lenp, bool const do_utf8sv_is_utf8) +STATIC bool S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); diff --git a/regexec.c b/regexec.c index 068c67c..ce2861f 100644 --- a/regexec.c +++ b/regexec.c @@ -104,7 +104,7 @@ const char* const non_utf8_target_but_utf8_required /* Valid for non-utf8 strings: avoids the reginclass * call if there are no complications: i.e., if everything matchable is * straight forward in the bitmap */ -#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) \ +#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0) \ : ANYOF_BITMAP_TEST(p,*(c))) /* @@ -1458,9 +1458,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, switch (OP(c)) { case ANYOF: if (utf8_target) { - STRLEN inclasslen = strend - s; REXEC_FBC_UTF8_CLASS_SCAN( - reginclass(prog, c, (U8*)s, &inclasslen, utf8_target)); + reginclass(prog, c, (U8*)s, utf8_target)); } else { REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s)); @@ -4315,10 +4314,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if (NEXTCHR_IS_EOS) sayNO; if (utf8_target) { - STRLEN inclasslen = PL_regeol - locinput; - if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target)) + if (!reginclass(rex, scan, (U8*)locinput, utf8_target)) sayNO; - locinput += inclasslen; + locinput += UTF8SKIP(locinput); break; } else { @@ -6763,10 +6761,9 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma case ANYOF: if (utf8_target) { STRLEN inclasslen; - inclasslen = loceol - scan; while (hardcount < max - && ((inclasslen = loceol - scan) > 0) - && reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target)) + && scan + (inclasslen = UTF8SKIP(scan)) <= loceol + && reginclass(prog, p, (U8*)scan, utf8_target)) { scan += inclasslen; hardcount++; @@ -7321,15 +7318,9 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo n is the ANYOF regnode p is the target string - lenp is pointer to the maximum number of bytes of how far to go in p - (This is assumed wthout checking to always be at least the current - character's size) utf8_target tells whether p is in UTF-8. - Returns true if matched; false otherwise. If lenp is not NULL, on return - from a successful match, the value it points to will be updated to how many - bytes in p were matched. If there was no match, the value is undefined, - possibly changed from the input. + Returns true if matched; false otherwise. Note that this can be a synthetic start class, a combination of various nodes, so things you think might be mutually exclusive, such as locale, @@ -7338,19 +7329,18 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo */ STATIC bool -S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target) +S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, register const bool utf8_target) { dVAR; const char flags = ANYOF_FLAGS(n); bool match = FALSE; UV c = *p; - STRLEN c_len = 0; - STRLEN maxlen; PERL_ARGS_ASSERT_REGINCLASS; /* If c is not already the code point, get it */ if (utf8_target && !UTF8_IS_INVARIANT(c)) { + STRLEN c_len = 0; c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len, (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY); @@ -7359,21 +7349,6 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, if (c_len == (STRLEN)-1) Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)"); } - else { - c_len = 1; - } - - /* Use passed in max length, or one character if none passed in or less - * than one character. And assume will match just one character. This is - * overwritten later if matched more. */ - if (lenp) { - maxlen = (*lenp > c_len) ? *lenp : c_len; - *lenp = c_len; - - } - else { - maxlen = c_len; - } /* If this character is potentially in the bitmap, check it */ if (c < 256) { -- 2.7.4