regcomp utf8 len cache panic
authorDavid Mitchell <davem@iabyn.com>
Tue, 10 Dec 2013 17:17:06 +0000 (17:17 +0000)
committerDavid Mitchell <davem@iabyn.com>
Fri, 7 Feb 2014 22:39:35 +0000 (22:39 +0000)
Compiling this regex:

    /\x{100}[xy]\x{100}{2}/

caused this:

    panic: sv_len_utf8 cache 1 real 2

This was due to the code in S_study_chunk() mixing up char and byte
lengths when updating the utf8 length cache on a utf8 string that
had been extended by repeatedly duplicating the last n chars.

(The second test is for an issue introduced during an initial attempt to
fix this).

regcomp.c
t/re/re_tests

index 499a366..cfc46b1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4630,6 +4630,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    pars++;
                if (flags & SCF_DO_SUBSTR) {
                    SV *last_str = NULL;
+                    STRLEN last_chrs = 0;
                    int counted = mincount != 0;
 
                     if (data->last_end > 0 && mincount != 0) { /* Ends with a
@@ -4645,9 +4646,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        l -= old;
                        /* Get the added string: */
                        last_str = newSVpvn_utf8(s  + old, l, UTF);
+                        last_chrs = UTF ? utf8_length((U8*)(s + old),
+                                            (U8*)(s + old + l)) : l;
                        if (deltanext == 0 && pos_before == b) {
                            /* What was added is a constant string */
                            if (mincount > 1) {
+
                                SvGROW(last_str, (mincount * l) + 1);
                                repeatcpy(SvPVX(last_str) + l,
                                          SvPVX_const(last_str), l,
@@ -4663,8 +4667,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                        SvUTF8(sv) && SvMAGICAL(sv) ?
                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
                                    if (mg && mg->mg_len >= 0)
-                                       mg->mg_len += CHR_SVLEN(last_str) - l;
+                                       mg->mg_len += last_chrs * (mincount-1);
                                }
+                                last_chrs *= mincount;
                                data->last_end += l * (mincount - 1);
                            }
                        } else {
@@ -4706,12 +4711,10 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                                mg->mg_len = -1;
                            sv_setsv(sv, last_str);
                            data->last_end = data->pos_min;
-                           data->last_start_min =
-                               data->pos_min - CHR_SVLEN(last_str);
+                           data->last_start_min = data->pos_min - last_chrs;
                            data->last_start_max = is_inf
                                ? SSize_t_MAX
-                               : data->pos_min + data->pos_delta
-                               - CHR_SVLEN(last_str);
+                               : data->pos_min + data->pos_delta - last_chrs;
                        }
                        data->longest = &(data->longest_float);
                    }
index d5c66f7..c6ff296 100644 (file)
@@ -1848,5 +1848,9 @@ A+(*PRUNE)BC(?{}) AAABC   y       $&      AAABC
 /^\S+=/d       \x{3a3}=\x{3a0} y       $&      \x{3a3}=
 /^\S+=/u       \x{3a3}=\x{3a0} y       $&      \x{3a3}=
 
+# utf8 cache length panics
+\x{100}[xy]\x{100}{2}  \x{100}y\x{100}\x{100}  y       $&      \x{100}y\x{100}\x{100}
+\x{100}a{2,3}  \x{100}aaa      y       $&      \x{100}aaa
+
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab