Fix and add tests for *PRUNE/*THEN plus leading non-greedy +
authorYves Orton <demerphq@gmail.com>
Sat, 22 Jun 2013 16:17:09 +0000 (18:17 +0200)
committerYves Orton <demerphq@gmail.com>
Sat, 22 Jun 2013 16:42:01 +0000 (18:42 +0200)
"aaabc" should match /a+?(*THEN)bc/ with "abc".

embed.h
proto.h
regcomp.c
regnodes.h
t/re/re_tests
t/re/regexp.t

diff --git a/embed.h b/embed.h
index 8637471..1d7000e 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define dumpuntil(a,b,c,d,e,f,g,h)     S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
 #define put_byte(a,b)          S_put_byte(aTHX_ a,b)
 #define regdump_extflags(a,b)  S_regdump_extflags(aTHX_ a,b)
-#define regdump_intflags(a,b)        S_regdump_intflags(aTHX_ a,b)
+#define regdump_intflags(a,b)  S_regdump_intflags(aTHX_ a,b)
 #define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
 #    endif
 #    if defined(PERL_IN_REGEXEC_C)
diff --git a/proto.h b/proto.h
index e7695a3..0630c37 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5188,7 +5188,7 @@ STATIC void       S_put_byte(pTHX_ SV* sv, int c)
        assert(sv)
 
 STATIC void    S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
-STATIC void        S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
+STATIC void    S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
 STATIC U8      S_regtail_study(pTHX_ struct RExC_state_t *pRExC_state, regnode *p, const regnode *val, U32 depth)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
index 3cb7829..d7c5b00 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -5521,6 +5521,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     I32 sawlookahead = 0;
     I32 sawplus = 0;
     I32 sawopen = 0;
+    I32 sawminmod = 0;
+
     regex_charset initial_charset = get_regex_charset(orig_rx_flags);
     bool recompile = 0;
     bool runtime_code = 0;
@@ -6029,7 +6031,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     }
 
 reStudy:
-    r->minlen = minlen = sawlookahead = sawplus = sawopen = 0;
+    r->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0;
     Zero(r->substrs, 1, struct reg_substr_data);
 
 #ifdef TRIE_STUDY_OPT
@@ -6098,12 +6100,15 @@ reStudy:
                 * the only op that could be a regnode is PLUS, all the rest
                 * will be regnode_1 or regnode_2.
                 *
+                 * (yves doesn't think this is true)
                 */
                if (OP(first) == PLUS)
                    sawplus = 1;
-               else
+                else {
+                    if (OP(first) == MINMOD)
+                        sawminmod = 1;
                    first += regarglen[OP(first)];
-
+                }
                first = NEXTOPER(first);
                first_next= regnext(first);
        }
@@ -6174,7 +6179,7 @@ reStudy:
            first = NEXTOPER(first);
            goto again;
        }
-       if (sawplus && !sawlookahead && (!sawopen || !RExC_sawback)
+        if (sawplus && !sawminmod && !sawlookahead && (!sawopen || !RExC_sawback)
            && !pRExC_state->num_code_blocks) /* May examine pos and $& */
            /* x+ must match at the 1st pos of run of x's */
            r->intflags |= PREGf_SKIP;
index d6c57e0..b302f5d 100644 (file)
@@ -692,12 +692,12 @@ EXTCONST char * const PL_reg_extflags_name[] = {
 EXTCONST char * PL_reg_intflags_name[];
 #else
 EXTCONST char * const PL_reg_intflags_name[] = {
-        "SKIP",                       /* 0x00000001 - PREGf_SKIP */
-        "IMPLICIT",                   /* 0x00000002 - PREGf_IMPLICIT -  Converted .* to ^.*  */
-        "NAUGHTY",                    /* 0x00000004 - PREGf_NAUGHTY -  how exponential is this pattern?  */
-        "VERBARG_SEEN",               /* 0x00000008 - PREGf_VERBARG_SEEN */
-        "CUTGROUP_SEEN",              /* 0x00000010 - PREGf_CUTGROUP_SEEN */
-        "USE_RE_EVAL",                /* 0x00000020 - PREGf_USE_RE_EVAL -  compiled with "use re 'eval'"  */
+       "SKIP",                       /* 0x00000001 - PREGf_SKIP */
+       "IMPLICIT",                   /* 0x00000002 - PREGf_IMPLICIT -  Converted .* to ^.*  */
+       "NAUGHTY",                    /* 0x00000004 - PREGf_NAUGHTY -  how exponential is this pattern?  */
+       "VERBARG_SEEN",               /* 0x00000008 - PREGf_VERBARG_SEEN */
+       "CUTGROUP_SEEN",              /* 0x00000010 - PREGf_CUTGROUP_SEEN */
+       "USE_RE_EVAL",                /* 0x00000020 - PREGf_USE_RE_EVAL -  compiled with "use re 'eval'"  */
 };
 #endif /* DOINIT */
 
index 0e7d8d3..b3231c2 100644 (file)
@@ -1754,4 +1754,12 @@ m?^xy\?$?        xy?     y       $&      xy?
 ((A(*PRUNE)B|A(*PRUNE)D|A(*PRUNE)C))   AC      n       -       -
 ((A(*PRUNE)B|A(*PRUNE)C|A(*PRUNE)D))   AC      n       -       -
 
+A+?(*THEN)BC   AAABC   y       $&      ABC
+A+?(*PRUNE)BC  AAABC   y       $&      ABC
+A+(*THEN)BC    AAABC   y       $&      AAABC
+A+(*PRUNE)BC   AAABC   y       $&      AAABC
+A+?(*THEN)BC(?{})      AAABC   y       $&      ABC
+A+?(*PRUNE)BC(?{})     AAABC   y       $&      ABC
+A+(*THEN)BC(?{})       AAABC   y       $&      AAABC
+A+(*PRUNE)BC(?{})      AAABC   y       $&      AAABC
 # vim: softtabstop=0 noexpandtab
index 21cae1d..151e8e5 100644 (file)
@@ -101,6 +101,9 @@ foreach (@tests) {
     chomp;
     s/\\n/\n/g unless $regex_sets;
     my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6);
+    if (!defined $subject) {
+        die "Bad test definition on line $test: $_\n";
+    }
     $reason = '' unless defined $reason;
     my $input = join(':',$pat,$subject,$result,$repl,$expect);
     # the double '' below keeps simple syntax highlighters from going crazy