Make 'OR's logically 'or'
authorKarl Williamson <public@khwilliamson.com>
Tue, 24 Sep 2013 03:40:05 +0000 (21:40 -0600)
committerKarl Williamson <public@khwilliamson.com>
Tue, 24 Sep 2013 17:36:20 +0000 (11:36 -0600)
The regex optimizer in several OR operations cleared the synthetic start
class (SSC) node from matching an empty string.  This doesn't seem
right.  The end result of an OR should be the same size or larger than
either of its inputs.  I talked it over with Yves Orton, and we decided
that even though we don't have test cases that demonstrate problems with
the current behavior, we should do the logical thing.  This change
should not cause code failures, as it only increases the things that the
SSC can match, perhaps these are false positives.

regcomp.c

index 774517a..7466f5d 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3926,7 +3926,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 ssc_clear_locale(data->start_class);
            }
            else if (flags & SCF_DO_STCLASS_OR) {
-                CLEAR_SSC_EOS(data->start_class);
                 ssc_add_cp(data->start_class, uc);
                ssc_and(pRExC_state, data->start_class, and_withp);
            }
@@ -4046,7 +4045,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
            }
            else if (flags & SCF_DO_STCLASS_OR) {
-                CLEAR_SSC_EOS(data->start_class);
                 ssc_union(data->start_class, EXACTF_invlist, FALSE);
                ssc_and(pRExC_state, data->start_class, and_withp);
            }
@@ -4454,7 +4452,6 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                               PL_XPosix_ptrs[_CC_VERTSPACE],
                               FALSE);
                    ssc_and(pRExC_state, data->start_class, and_withp);
-                    CLEAR_SSC_EOS(data->start_class); /* No match on empty */
                 }
                flags &= ~SCF_DO_STCLASS;
             }
@@ -4480,7 +4477,9 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                 U8 classnum;
                 U8 namedclass;
 
-                CLEAR_SSC_EOS(data->start_class); /* No match on empty */
+                if (flags & SCF_DO_STCLASS_AND) {
+                    CLEAR_SSC_EOS(data->start_class); /* No match on empty */
+                }
 
                /* Some of the logic below assumes that switching
                   locale on will only add false positives. */