From 779bcb7d68c0d77839c133a5b8429f43e63a961f Mon Sep 17 00:00:00 2001 From: Nick Cleaton Date: Mon, 29 Nov 2010 22:26:43 -0800 Subject: [PATCH] [perl #79152] super-linear cache can prevent a valid match The super-linear cache in regexec.c can prevent a valid match from being detected. For example: print "yay\n" if 'xayxay' =~ /(q1|.)*(q2|.)*(x(a|bc)*y){2,}/; This should match, but it doesn't because the cache fails to distinguish between matching the final xay to x(a|bc)*y as the first instance of the {2,} and matching it in the same position as the second instance. This seems to do the trick. --- regcomp.c | 17 ++++++++++------- t/re/re_tests | 5 +++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/regcomp.c b/regcomp.c index 4092d79..4b69bf7 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3246,13 +3246,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, f |= SCF_DO_STCLASS_AND; f &= ~SCF_DO_STCLASS_OR; } - /* These are the cases when once a subexpression - fails at a particular position, it cannot succeed - even after backtracking at the enclosing scope. - - XXXX what if minimal match and we are at the - initial run of {n,m}? */ - if ((mincount != maxcount - 1) && (maxcount != REG_INFTY)) + /* Exclude from super-linear cache processing any {n,m} + regops for which the combination of input pos and regex + pos is not enough information to determine if a match + will be possible. + + For example, in the regex /foo(bar\s*){4,8}baz/ with the + regex pos at the \s*, the prospects for a match depend not + only on the input position but also on how many (bar\s*) + repeats into the {4,8} we are. */ + if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY)) f &= ~SCF_WHILEM_VISITED_POS; /* This will finish on WHILEM, setting scan, or on NULL: */ diff --git a/t/re/re_tests b/t/re/re_tests index 66a47cc..02da1e1 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1482,5 +1482,10 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer [\0005] 5\000 y $& 5 [\_] _ y $& _ +# RT #79152 +(q1|.)*(q2|.)*(x(a|bc)*y){2,} xayxay y $& xayxay +(q1|.)*(q2|.)*(x(a|bc)*y){2,3} xayxay y $& xayxay +(q1|z)*(q2|z)*z{15}-.*?(x(a|bc)*y){2,3}Z zzzzzzzzzzzzzzzz-xayxayxayxayZ y $& zzzzzzzzzzzzzzzz-xayxayxayxayZ + (?:(?:)foo|bar|zot|rt78356) foo y $& foo # vim: softtabstop=0 noexpandtab -- 2.7.4