From 1462525b8916fe18637f62742c02f7016eb23fab Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 17 Feb 2014 13:47:00 -0700 Subject: [PATCH] regexes: Remove uses of ANYOF_LOCALE flag This flag no longer adds any useful information and can be removed. An ANYOF node that depends on locale either matches a POSIX class like /d, or matches case insensitively, or both. There are flags for both these cases, and to see if something matches locale, one merely needs to see if either flag is set. Not having to keep track of this extra flag simplifies things, and will allow it to be removed. There was a time when this flag was shared with one of the remaining locale ones, and there was relict code that allowed that sharing to be reinstated, and which this commit also removes. --- regcomp.c | 31 ++++++++----------------------- regcomp.h | 6 ++---- regexec.c | 4 ++-- 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/regcomp.c b/regcomp.c index 252ccca..6382c78 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1452,13 +1452,11 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL, FALSE); - /* The code points that could match under /li are already incorporated into - * the inversion list and bit map */ - ANYOF_FLAGS(ssc) &= ~ANYOF_LOC_FOLD; - if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) { - ANYOF_FLAGS(ssc) |= ANYOF_LOCALE|ANYOF_POSIXL; + ANYOF_FLAGS(ssc) |= ANYOF_POSIXL; } + + assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale); } #define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ] @@ -4259,12 +4257,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } } if (OP(scan) == EXACTFL) { - if (flags & SCF_DO_STCLASS_AND) { - ssc_flags_and(data->start_class, ANYOF_LOCALE); - } - else if (flags & SCF_DO_STCLASS_OR) { - ANYOF_FLAGS(data->start_class) |= ANYOF_LOCALE; - } /* We don't know what the folds are; it could be anything. XXX * Actually, we only support UTF-8 encoding for code points @@ -13227,9 +13219,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, ANYOF_FLAGS(ret) = 0; RExC_emit += ANYOF_SKIP; - if (LOC) { - ANYOF_FLAGS(ret) |= ANYOF_LOCALE; - } listsv = newSVpvs_flags("# comment\n", SVs_TEMP); initial_listsv_len = SvCUR(listsv); SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated. */ @@ -13641,8 +13630,7 @@ parseit: /* What matches in a locale is not known until runtime. This includes * what the Posix classes (like \w, [:space:]) match. Room must be * reserved (one time per outer bracketed class) to store such classes, - * either if Perl is compiled so that locale nodes always should have - * this space, or if there is such posix class info to be stored. The + * if there is such posix class info to be stored. The * space will contain a bit for each named class that is to be matched * against. This isn't needed for \p{} and pseudo-classes, as they are * not affected by locale, and hence are dealt with separately */ @@ -13661,10 +13649,7 @@ parseit: * a posix class since are doing it here */ ANYOF_POSIXL_ZERO(ret); } - if (ANYOF_LOCALE == ANYOF_POSIXL - || (namedclass > OOB_NAMEDCLASS - && namedclass < ANYOF_POSIXL_MAX)) - { + if (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_POSIXL_MAX) { if (! need_class) { need_class = 1; if (SIZE_ONLY) { @@ -14627,7 +14612,7 @@ parseit: * invert if there are things such as \w, which aren't known until runtime * */ if (invert - && ! (ANYOF_FLAGS(ret) & (ANYOF_LOC_FOLD|ANYOF_POSIXL)) + && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS)) && ! depends_list && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION) { @@ -14675,7 +14660,7 @@ parseit: if (cp_list && ! invert && ! depends_list - && ! (ANYOF_FLAGS(ret) & (ANYOF_LOC_FOLD|ANYOF_POSIXL)) + && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS)) && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION /* We don't optimize if we are supposed to make sure all non-Unicode @@ -15657,7 +15642,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) int do_sep = 0; - if (flags & ANYOF_LOCALE) + if (flags & ANYOF_LOCALE_FLAGS) sv_catpvs(sv, "{loc}"); if (flags & ANYOF_LOC_FOLD) sv_catpvs(sv, "{i}"); diff --git a/regcomp.h b/regcomp.h index af1a970..a7908be 100644 --- a/regcomp.h +++ b/regcomp.h @@ -422,14 +422,12 @@ struct regnode_ssc { #define ANYOF_FLAGS_ALL (0xff) -#define ANYOF_LOCALE_FLAGS (ANYOF_LOCALE \ - |ANYOF_LOC_FOLD \ - |ANYOF_POSIXL) +#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_POSIXL) /* These are the flags that apply to both regular ANYOF nodes and synthetic * start class nodes during construction of the SSC. During finalization of * the SSC, other of the flags could be added to it */ -#define ANYOF_COMMON_FLAGS (ANYOF_LOCALE_FLAGS | ANYOF_WARN_SUPER) +#define ANYOF_COMMON_FLAGS (ANYOF_WARN_SUPER) /* Character classes for node->classflags of ANYOF */ /* Should be synchronized with a table in regprop() */ diff --git a/regexec.c b/regexec.c index a2928ce..c31ae76 100644 --- a/regexec.c +++ b/regexec.c @@ -7652,7 +7652,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const { match = TRUE; } - else if (flags & ANYOF_LOCALE) { + else if (flags & ANYOF_LOCALE_FLAGS) { if (flags & ANYOF_LOC_FOLD) { RXp_MATCH_TAINTED_on(prog); if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) { @@ -7736,7 +7736,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const && ((flags & ANYOF_NONBITMAP_NON_UTF8) || (utf8_target && (c >=256 - || (! (flags & ANYOF_LOCALE)) + || (! (flags & ANYOF_LOCALE_FLAGS)) || is_ANYOF_SYNTHETIC(n))))) { SV * const sw = core_regclass_swash(prog, n, TRUE, 0); -- 2.7.4