From 8951c461a5079d86be33a432491eda98c24dc397 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 9 Nov 2010 22:05:44 -0700 Subject: [PATCH] PATCH: [perl #78994]: optimizer loses folding The optimizer works by creating an initial ANYOF node that is supposed to match anything. When it finds the first node that it wants to allow be a first match, it ANDs the anything node with that one, effectively setting the first node to it. Subsequent nodes that could match in the first position are OR'd instead, adding to the possibilities that the first match could be. But the optimizer was not dealing with the possibility of folding, so the first node did not have that as a possibility and hence a match that relied on the fold would not succeed. Originally, the patch had an 'if (FOLD)' guarding the initialization of the match anything node , but it turns out that FOLD is set only if folding is on globally, and in the example program of the trouble ticket, it isn't set at the proper time, because the regex is recompiled when upgraded to utf8, and it doesn't get set then. So I unconditionally set ANYOF_FOLD. --- regcomp.c | 7 +++++++ t/re/reg_fold.t | 2 ++ 2 files changed, 9 insertions(+) diff --git a/regcomp.c b/regcomp.c index 852906a..7c7f526 100644 --- a/regcomp.c +++ b/regcomp.c @@ -710,6 +710,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL; if (LOC) cl->flags |= ANYOF_LOCALE; + cl->flags |= ANYOF_FOLD; } /* Can match anything (initialization) */ @@ -779,6 +780,9 @@ S_cl_and(struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; + if (!(and_with->flags & ANYOF_FOLD)) + cl->flags &= ~ANYOF_FOLD; + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; @@ -844,6 +848,9 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con if (or_with->flags & ANYOF_EOS) cl->flags |= ANYOF_EOS; + if (or_with->flags & ANYOF_FOLD) + cl->flags |= ANYOF_FOLD; + if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE && ARG(cl) != ARG(or_with)) { cl->flags |= ANYOF_UNICODE_ALL; diff --git a/t/re/reg_fold.t b/t/re/reg_fold.t index af5ba28..a9f3bbd 100644 --- a/t/re/reg_fold.t +++ b/t/re/reg_fold.t @@ -77,6 +77,8 @@ push @tests, qq[like chr(0x0430), qr/[=\x{0410}-\x{0411}]/i, 'Bug #71752 Unicode $count++; push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"]; $count++; +push @tests, q[my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); like $c, $p, 'Bug #78994: my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); $c =~ $p']; +$count++; eval join ";\n","plan tests=>".($count-1),@tests,"1" or die $@; -- 2.7.4