From f9176b44e50593d8f3446da63d3989558f6d4c20 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Sat, 18 May 2013 13:25:36 +0100 Subject: [PATCH] find_byclass, regrepeat: remove is_utf8_pat arg Remove the is_utf8_pat arg from these two static functions in regexec.c. Since both these functions are now passed a valid reginfo pointer, this info is already available as one of the fields in that struct. --- embed.fnc | 6 ++---- embed.h | 4 ++-- proto.h | 4 ++-- regexec.c | 35 ++++++++++++++++++----------------- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/embed.fnc b/embed.fnc index ed32623..b8f85a4 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2072,8 +2072,7 @@ ERs |I32 |regrepeat |NN regexp *prog|NN char **startposp \ |NN const regnode *p \ |NN regmatch_info *const reginfo \ |I32 max \ - |int depth \ - |bool is_utf8_pat + |int depth ERs |I32 |regtry |NN regmatch_info *reginfo|NN char **startposp ERs |bool |reginclass |NULLOK regexp * const prog|NN const regnode * const n|NN const U8 * const p\ |bool const utf8_target @@ -2092,8 +2091,7 @@ ERsn |U8* |reghop4 |NN U8 *s|I32 off|NN const U8 *llim \ ERsn |U8* |reghopmaybe3 |NN U8 *s|I32 off|NN const U8 *lim ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c \ |NN char *s|NN const char *strend \ - |NULLOK regmatch_info *reginfo \ - |bool is_utf_pat + |NULLOK regmatch_info *reginfo Es |void |to_utf8_substr |NN regexp * prog Es |bool |to_byte_substr |NN regexp * prog ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex \ diff --git a/embed.h b/embed.h index 850d4c3..ff43e10 100644 --- a/embed.h +++ b/embed.h @@ -976,7 +976,7 @@ # endif # if defined(PERL_IN_REGEXEC_C) #define core_regclass_swash(a,b,c,d) S_core_regclass_swash(aTHX_ a,b,c,d) -#define find_byclass(a,b,c,d,e,f) S_find_byclass(aTHX_ a,b,c,d,e,f) +#define find_byclass(a,b,c,d,e) S_find_byclass(aTHX_ a,b,c,d,e) #define isFOO_lc(a,b) S_isFOO_lc(aTHX_ a,b) #define isFOO_utf8_lc(a,b) S_isFOO_utf8_lc(aTHX_ a,b) #define reg_check_named_buff_matched(a,b) S_reg_check_named_buff_matched(aTHX_ a,b) @@ -986,7 +986,7 @@ #define reghopmaybe3 S_reghopmaybe3 #define reginclass(a,b,c,d) S_reginclass(aTHX_ a,b,c,d) #define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c) -#define regrepeat(a,b,c,d,e,f,g) S_regrepeat(aTHX_ a,b,c,d,e,f,g) +#define regrepeat(a,b,c,d,e,f) S_regrepeat(aTHX_ a,b,c,d,e,f) #define regtry(a,b) S_regtry(aTHX_ a,b) #define to_byte_substr(a) S_to_byte_substr(aTHX_ a) #define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a) diff --git a/proto.h b/proto.h index 714e412..806b56c 100644 --- a/proto.h +++ b/proto.h @@ -6891,7 +6891,7 @@ STATIC SV* S_core_regclass_swash(pTHX_ const regexp *prog, const struct regnode #define PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH \ assert(node) -STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo, bool is_utf_pat) +STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) @@ -6956,7 +6956,7 @@ STATIC I32 S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *pro #define PERL_ARGS_ASSERT_REGMATCH \ assert(reginfo); assert(startpos); assert(prog) -STATIC I32 S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, regmatch_info *const reginfo, I32 max, int depth, bool is_utf8_pat) +STATIC I32 S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, regmatch_info *const reginfo, I32 max, int depth) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regexec.c b/regexec.c index e7c129b..0b73c72 100644 --- a/regexec.c +++ b/regexec.c @@ -653,6 +653,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, strbeg = strpos; reginfo->strend = strend; + reginfo->is_utf8_pat = is_utf8_pat; reginfo->intuit = 1; if (utf8_target) { @@ -1129,7 +1130,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, t = s; s = find_byclass(prog, progi->regstclass, checked_upto, endpos, - reginfo, is_utf8_pat); + reginfo); if (s) { checked_upto = s; } else { @@ -1437,7 +1438,7 @@ if ((reginfo->intuit || regtry(reginfo, &s))) \ STATIC char * S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, - const char *strend, regmatch_info *reginfo, bool is_utf8_pat) + const char *strend, regmatch_info *reginfo) { dVAR; const I32 doevery = (prog->intflags & PREGf_SKIP) == 0; @@ -1453,6 +1454,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, I32 tmp = 1; /* Scratch variable? */ const bool utf8_target = PL_reg_match_utf8; UV utf8_fold_flags = 0; + const bool is_utf8_pat = reginfo->is_utf8_pat; bool to_complement = FALSE; /* Invert the result? Taking the xor of this with a result inverts that result, as 0^1 = 1 and 1^1 = 0 */ @@ -2464,7 +2466,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, quoted, (int)(strend - s)); } }); - if (find_byclass(prog, c, s, strend, reginfo, reginfo->is_utf8_pat)) + if (find_byclass(prog, c, s, strend, reginfo)) goto got_it; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n")); } @@ -5980,7 +5982,7 @@ NULL char *li = locinput; minmod = 0; if (ST.min && - regrepeat(rex, &li, ST.A, reginfo, ST.min, depth, is_utf8_pat) + regrepeat(rex, &li, ST.A, reginfo, ST.min, depth) < ST.min) sayNO; SET_locinput(li); @@ -6017,8 +6019,7 @@ NULL /* avoid taking address of locinput, so it can remain * a register var */ char *li = locinput; - ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max, depth, - is_utf8_pat); + ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max, depth); if (ST.count < ST.min) sayNO; SET_locinput(li); @@ -6102,7 +6103,7 @@ NULL * locinput matches */ char *li = ST.oldloc; ST.count += n; - if (regrepeat(rex, &li, ST.A, reginfo, n, depth, is_utf8_pat) < n) + if (regrepeat(rex, &li, ST.A, reginfo, n, depth) < n) sayNO; assert(n == REG_INFTY || locinput == li); } @@ -6126,7 +6127,7 @@ NULL /* failed -- move forward one */ { char *li = locinput; - if (!regrepeat(rex, &li, ST.A, reginfo, 1, depth, is_utf8_pat)) { + if (!regrepeat(rex, &li, ST.A, reginfo, 1, depth)) { sayNO; } locinput = li; @@ -6655,7 +6656,7 @@ no_silent: */ STATIC I32 S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, - regmatch_info *const reginfo, I32 max, int depth, bool is_utf8_pat) + regmatch_info *const reginfo, I32 max, int depth) { dVAR; char *scan; /* Pointer to current position in target string */ @@ -6734,7 +6735,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, } break; case EXACT: - assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1); + assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1); c = (U8)*STRING(p); @@ -6742,7 +6743,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, * under UTF-8, or both target and pattern aren't UTF-8. Note that we * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's * true iff it doesn't matter if the argument is in UTF-8 or not */ - if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! is_utf8_pat)) { + if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! reginfo->is_utf8_pat)) { if (utf8_target && scan + max < loceol) { /* We didn't adjust because is UTF-8, but ok to do so, * since here, to match at all, 1 char == 1 byte */ @@ -6752,7 +6753,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, scan++; } } - else if (is_utf8_pat) { + else if (reginfo->is_utf8_pat) { if (utf8_target) { STRLEN scan_char_len; @@ -6814,25 +6815,25 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, case EXACTFU_SS: case EXACTFU_TRICKYFOLD: case EXACTFU: - utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0; + utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0; do_exactf: { int c1, c2; U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1]; - assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1); + assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1); if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8, - is_utf8_pat)) + reginfo->is_utf8_pat)) { if (c1 == CHRTEST_VOID) { /* Use full Unicode fold matching */ char *tmpeol = reginfo->strend; - STRLEN pat_len = is_utf8_pat ? UTF8SKIP(STRING(p)) : 1; + STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1; while (hardcount < max && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target, STRING(p), NULL, pat_len, - is_utf8_pat, utf8_flags)) + reginfo->is_utf8_pat, utf8_flags)) { scan = tmpeol; tmpeol = reginfo->strend; -- 2.7.4