From 272d35c920ff70f3a216eedf88a0152190a41867 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 25 Dec 2012 20:51:50 +0000 Subject: [PATCH] Eliminate RF_tainted flag from PL_reg_flags This global flag is cleared at the start of execution, and then set if any locale-based nodes are executed. At the end of execution, the RXf_TAINTED_SEEN flag on the regex is set/cleared based on RF_tainted. We eliminate RF_tainted by simply directly setting RXf_TAINTED_SEEN each time a taintable node is executed. This is the final step before eliminating PL_reg_flags. --- embed.fnc | 4 ++-- pp_hot.c | 9 ++++----- proto.h | 4 ++-- regexec.c | 31 ++++++++++++++----------------- regexp.h | 14 ++++++++++---- 5 files changed, 32 insertions(+), 30 deletions(-) diff --git a/embed.fnc b/embed.fnc index 97e16ba..a2c77b2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2032,11 +2032,11 @@ Es |U8 |regtail_study |NN struct RExC_state_t *pRExC_state \ ERs |bool |isFOO_lc |const U8 classnum|const U8 character ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character ERs |I32 |regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog -ERs |I32 |regrepeat |NN const regexp *prog|NN char **startposp \ +ERs |I32 |regrepeat |NN regexp *prog|NN char **startposp \ |NN const regnode *p|I32 max|int depth \ |bool is_utf8_pat ERs |I32 |regtry |NN regmatch_info *reginfo|NN char **startposp -ERs |bool |reginclass |NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p\ +ERs |bool |reginclass |NULLOK regexp * const prog|NN const regnode * const n|NN const U8 * const p\ |bool const utf8_target Es |CHECKPOINT|regcppush |NN const regexp *rex|I32 parenfloor\ |U32 maxopenparen diff --git a/pp_hot.c b/pp_hot.c index 33f8669..be634a3 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -2084,12 +2084,11 @@ the pattern is marked as tainted. This means that subsequent usage, such as /x$r/, will set PL_tainted using TAINT_set, and thus RXf_TAINTED, on the new pattern too. -During execution of a pattern, locale-variant ops such as ALNUML set the -local flag RF_tainted. At the end of execution, the engine sets the -RXf_TAINTED_SEEN on the pattern if RF_tainted got set, or clears it -otherwise. +At the start of execution of a pattern, the RXf_TAINTED_SEEN flag on the +regex is cleared; during execution, locale-variant ops such as ALNUML may +set RXf_TAINTED_SEEN. -In addition, RXf_TAINTED_SEEN is used post-execution by the get magic code +RXf_TAINTED_SEEN is used post-execution by the get magic code of $1 et al to indicate whether the returned value should be tainted. It is the responsibility of the caller of the pattern (i.e. pp_match, pp_subst etc) to set this flag for any other circumstances where $1 needs diff --git a/proto.h b/proto.h index a962d32..1151167 100644 --- a/proto.h +++ b/proto.h @@ -6884,7 +6884,7 @@ STATIC U8* S_reghopmaybe3(U8 *s, I32 off, const U8 *lim) #define PERL_ARGS_ASSERT_REGHOPMAYBE3 \ assert(s); assert(lim) -STATIC bool S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target) +STATIC bool S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); @@ -6899,7 +6899,7 @@ STATIC I32 S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *pro #define PERL_ARGS_ASSERT_REGMATCH \ assert(reginfo); assert(startpos); assert(prog) -STATIC I32 S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth, bool is_utf8_pat) +STATIC I32 S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, I32 max, int depth, bool is_utf8_pat) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regexec.c b/regexec.c index 104e1d2..02d2b19 100644 --- a/regexec.c +++ b/regexec.c @@ -93,8 +93,6 @@ static const char* const non_utf8_target_but_utf8_required #include "inline_invlist.c" #include "unicode_constants.h" -#define RF_tainted 1 /* tainted information used? e.g. locale */ - #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i) #ifndef STATIC @@ -1616,13 +1614,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, break; } case BOUNDL: - PL_reg_flags |= RF_tainted; + RXp_MATCH_TAINTED_on(prog); FBC_BOUND(isALNUM_LC, isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)), isALNUM_LC_utf8((U8*)s)); break; case NBOUNDL: - PL_reg_flags |= RF_tainted; + RXp_MATCH_TAINTED_on(prog); FBC_NBOUND(isALNUM_LC, isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)), isALNUM_LC_utf8((U8*)s)); @@ -1671,7 +1669,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, /* FALLTHROUGH */ case POSIXL: - PL_reg_flags |= RF_tainted; + RXp_MATCH_TAINTED_on(prog); REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)), to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s))); break; @@ -2104,7 +2102,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, Perl_croak(aTHX_ "corrupted regexp program"); } - PL_reg_flags = 0; + RX_MATCH_TAINTED_off(rx); PL_reg_state.re_state_eval_setup_done = FALSE; PL_reg_maxiter = 0; @@ -2590,7 +2588,6 @@ got_it: ); ); Safefree(swap); - RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted); if (PL_reg_state.re_state_eval_setup_done) restore_pos(aTHX_ prog); @@ -4125,7 +4122,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) const char * s; U32 fold_utf8_flags; - PL_reg_flags |= RF_tainted; + RX_MATCH_TAINTED_on(reginfo->prog); folder = foldEQ_locale; fold_array = PL_fold_locale; fold_utf8_flags = FOLDEQ_UTF8_LOCALE; @@ -4189,7 +4186,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) * have to set the FLAGS fields of these */ case BOUNDL: /* /\b/l */ case NBOUNDL: /* /\B/l */ - PL_reg_flags |= RF_tainted; + RX_MATCH_TAINTED_on(reginfo->prog); /* FALL THROUGH */ case BOUND: /* /\b/ */ case BOUNDU: /* /\b/u */ @@ -4296,7 +4293,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) /* The locale hasn't influenced the outcome before this, so defer * tainting until now */ - PL_reg_flags |= RF_tainted; + RX_MATCH_TAINTED_on(reginfo->prog); /* Use isFOO_lc() for characters within Latin1. (Note that * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else @@ -4670,7 +4667,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) const U8 *fold_array; UV utf8_fold_flags; - PL_reg_flags |= RF_tainted; + RX_MATCH_TAINTED_on(reginfo->prog); folder = foldEQ_locale; fold_array = PL_fold_locale; type = REFFL; @@ -4715,7 +4712,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) goto do_nref_ref_common; case REFFL: /* /\1/il */ - PL_reg_flags |= RF_tainted; + RX_MATCH_TAINTED_on(reginfo->prog); folder = foldEQ_locale; fold_array = PL_fold_locale; utf8_fold_flags = FOLDEQ_UTF8_LOCALE; @@ -6639,7 +6636,7 @@ no_silent: * depth - (for debugging) backtracking depth. */ STATIC I32 -S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, +S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, I32 max, int depth, bool is_utf8_pat) { dVAR; @@ -6788,7 +6785,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, goto do_exactf; case EXACTFL: - PL_reg_flags |= RF_tainted; + RXp_MATCH_TAINTED_on(prog); utf8_flags = FOLDEQ_UTF8_LOCALE; goto do_exactf; @@ -6882,7 +6879,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, /* FALLTHROUGH */ case POSIXL: - PL_reg_flags |= RF_tainted; + RXp_MATCH_TAINTED_on(prog); if (! utf8_target) { while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p), *scan))) @@ -7269,7 +7266,7 @@ S_core_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit */ STATIC bool -S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target) +S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target) { dVAR; const char flags = ANYOF_FLAGS(n); @@ -7302,7 +7299,7 @@ S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8* match = TRUE; } else if (flags & ANYOF_LOCALE) { - PL_reg_flags |= RF_tainted; + RXp_MATCH_TAINTED_on(prog); if ((flags & ANYOF_LOC_FOLD) && ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) diff --git a/regexp.h b/regexp.h index 3863308..2c7eb87 100644 --- a/regexp.h +++ b/regexp.h @@ -438,16 +438,22 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #if NO_TAINT_SUPPORT # define RX_ISTAINTED(prog) 0 # define RX_TAINT_on(prog) NOOP +# define RXp_MATCH_TAINTED(prog) 0 +# define RX_MATCH_TAINTED(prog) 0 +# define RXp_MATCH_TAINTED_on(prog) NOOP +# define RX_MATCH_TAINTED_on(prog) NOOP +# define RX_MATCH_TAINTED_off(prog) NOOP #else # define RX_ISTAINTED(prog) (RX_EXTFLAGS(prog) & RXf_TAINTED) # define RX_TAINT_on(prog) (RX_EXTFLAGS(prog) |= RXf_TAINTED) +# define RXp_MATCH_TAINTED(prog) (RXp_EXTFLAGS(prog) & RXf_TAINTED_SEEN) +# define RX_MATCH_TAINTED(prog) (RX_EXTFLAGS(prog) & RXf_TAINTED_SEEN) +# define RXp_MATCH_TAINTED_on(prog) (RXp_EXTFLAGS(prog) |= RXf_TAINTED_SEEN) +# define RX_MATCH_TAINTED_on(prog) (RX_EXTFLAGS(prog) |= RXf_TAINTED_SEEN) +# define RX_MATCH_TAINTED_off(prog) (RX_EXTFLAGS(prog) &= ~RXf_TAINTED_SEEN) #endif #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) -#define RXp_MATCH_TAINTED(prog) (RXp_EXTFLAGS(prog) & RXf_TAINTED_SEEN) -#define RX_MATCH_TAINTED(prog) (RX_EXTFLAGS(prog) & RXf_TAINTED_SEEN) -#define RX_MATCH_TAINTED_on(prog) (RX_EXTFLAGS(prog) |= RXf_TAINTED_SEEN) -#define RX_MATCH_TAINTED_off(prog) (RX_EXTFLAGS(prog) &= ~RXf_TAINTED_SEEN) #define RX_MATCH_TAINTED_set(prog, t) ((t) \ ? RX_MATCH_TAINTED_on(prog) \ : RX_MATCH_TAINTED_off(prog)) -- 2.7.4