From 8f10278a677a11d641ea4247a9d28b058d11c78a Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 18 Mar 2014 15:26:00 +0000 Subject: [PATCH] re_intuit_start(): don't set ml_anch on BOL re_intuit_start() decided that a pattern was capable of being anchored after *any* \n in the string for a //m pattern that contains a BOL (rather than an MBOL). This can happen by embedding one regex in another for example. This is an incorrect assumption, and means that intuit() might try against every \n position in the string rather than just trying at the beginning. With this commit, the following code on my machine reduces in execution time from 7000ms to 5ms: my $r = qr/^abcd/; my $s = "abcd-xyz\n" x 500_000; $s =~ /$r\d{1,2}xyz/m for 1..200; --- regexec.c | 3 +-- t/re/pat.t | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/regexec.c b/regexec.c index f606622..6311cb3 100644 --- a/regexec.c +++ b/regexec.c @@ -753,8 +753,7 @@ Perl_re_intuit_start(pTHX_ if (prog->intflags & PREGf_ANCH) { /* Match at \G, beg-of-str or after \n */ /* Check after \n? */ - ml_anch = ( (prog->intflags & PREGf_ANCH_MBOL) - || ((prog->intflags & PREGf_ANCH_BOL) && multiline)); + ml_anch = (prog->intflags & PREGf_ANCH_MBOL); if (!ml_anch) { /* we are only allowed to match at BOS or \G */ diff --git a/t/re/pat.t b/t/re/pat.t index 79c7e6a..472141d 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -20,7 +20,7 @@ BEGIN { require './test.pl'; } -plan tests => 717; # Update this when adding/deleting tests. +plan tests => 718; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1538,6 +1538,11 @@ EOP $s .= "abx"; ok($s =~ /^ab.*x/m, "distant float with /m"); + my $r = qr/^abcd/; + $s = "abcd-xyz\n" x 500_000; + $s =~ /$r\d{1,2}xyz/m for 1..200; + pass("BOL within //m mustn't run slowly"); + } # These are based on looking at the code in regcomp.c -- 2.7.4