From ea3daa5d5f60389b105c00d4f7d05b5c2f2155f2 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Sun, 18 Aug 2013 14:03:06 -0700 Subject: [PATCH] Use SSize_t/STRLEN in more places in regexp code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit As part of getting the regexp engine to handle long strings, this com- mit changes any variables, parameters and struct members that hold lengths of the string being matched against (or parts thereof) to use SSize_t or STRLEN instead of [IU]32. To avoid having to change any logic, I kept the signedness the same. I did not change anything that affects the length of the regular expression itself, so regexps are still practically limited to I32_MAX. Changing that would involve changing the size of regnodes, which would be a lot more involved. These changes should fix bugs, but are very hard to test. In most cases, I don’t know the regexp engine well enough to come up with test cases that test the paths in question with long strings. In other cases I don’t have a box with enough memory to test the fix. --- embed.fnc | 21 +++++---- pod/perlreapi.pod | 4 +- proto.h | 14 +++--- regcomp.c | 137 ++++++++++++++++++++++++++++++------------------------ regexec.c | 49 +++++++++---------- regexp.h | 20 ++++---- 6 files changed, 131 insertions(+), 114 deletions(-) diff --git a/embed.fnc b/embed.fnc index ecf3e23..d223f0d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1096,8 +1096,8 @@ EMsR |SV* |_new_invlist_C_array|NN const UV* const list : Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b #endif Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \ - |NN char* strend|NN char* strbeg|I32 minend \ - |NN SV* screamer|U32 nosave + |NN char* strend|NN char* strbeg \ + |SSize_t minend |NN SV* screamer|U32 nosave Ap |void |pregfree |NULLOK REGEXP* r Ap |void |pregfree2 |NN REGEXP *rx : FIXME - is anything in re using this now? @@ -1128,8 +1128,9 @@ EiPR |I32 |regcurly |NN const char *s \ |const bool rbrace_must_be_escaped #endif Ap |I32 |regexec_flags |NN REGEXP *const rx|NN char *stringarg \ - |NN char *strend|NN char *strbeg|I32 minend \ - |NN SV *sv|NULLOK void *data|U32 flags + |NN char *strend|NN char *strbeg \ + |SSize_t minend|NN SV *sv \ + |NULLOK void *data|U32 flags ApR |regnode*|regnext |NULLOK regnode* p EXp |SV*|reg_named_buff |NN REGEXP * const rx|NULLOK SV * const key \ |NULLOK SV * const value|const U32 flags @@ -2033,8 +2034,8 @@ Ei |U8 |compute_EXACTish|NN struct RExC_state_t *pRExC_state Es |char * |nextchar |NN struct RExC_state_t *pRExC_state Es |bool |reg_skipcomment|NN struct RExC_state_t *pRExC_state Es |void |scan_commit |NN const struct RExC_state_t *pRExC_state \ - |NN struct scan_data_t *data|NN I32 *minlenp \ - |int is_inf + |NN struct scan_data_t *data \ + |NN SSize_t *minlenp|int is_inf Esn |void |cl_anything |NN const struct RExC_state_t *pRExC_state \ |NN struct regnode_charclass_class *cl EsRn |int |cl_is_anything |NN const struct regnode_charclass_class *cl @@ -2046,7 +2047,7 @@ Esn |void |cl_or |NN const struct RExC_state_t *pRExC_state \ |NN struct regnode_charclass_class *cl \ |NN const struct regnode_charclass_class *or_with Es |SSize_t|study_chunk |NN struct RExC_state_t *pRExC_state \ - |NN regnode **scanp|NN I32 *minlenp \ + |NN regnode **scanp|NN SSize_t *minlenp \ |NN SSize_t *deltap|NN regnode *last \ |NULLOK struct scan_data_t *data \ |I32 stopparen|NULLOK U8* recursed \ @@ -2104,15 +2105,15 @@ Es |CHECKPOINT|regcppush |NN const regexp *rex|I32 parenfloor\ |U32 maxopenparen Es |void |regcppop |NN regexp *rex\ |NN U32 *maxopenparen_p -ERsn |U8* |reghop3 |NN U8 *s|I32 off|NN const U8 *lim +ERsn |U8* |reghop3 |NN U8 *s|SSize_t off|NN const U8 *lim ERsM |SV* |core_regclass_swash|NULLOK const regexp *prog \ |NN const struct regnode *node|bool doinit \ |NULLOK SV **listsvp #ifdef XXX_dmq -ERsn |U8* |reghop4 |NN U8 *s|I32 off|NN const U8 *llim \ +ERsn |U8* |reghop4 |NN U8 *s|SSize_t off|NN const U8 *llim \ |NN const U8 *rlim #endif -ERsn |U8* |reghopmaybe3 |NN U8 *s|I32 off|NN const U8 *lim +ERsn |U8* |reghopmaybe3 |NN U8 *s|SSize_t off|NN const U8 *lim ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c \ |NN char *s|NN const char *strend \ |NULLOK regmatch_info *reginfo diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index 659088e..cfc41d7 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -17,7 +17,7 @@ following format: REGEXP * const rx, char* stringarg, char* strend, char* strbeg, - I32 minend, SV* sv, + SSize_t minend, SV* sv, void* data, U32 flags); char* (*intuit) (pTHX_ REGEXP * const rx, SV *sv, @@ -238,7 +238,7 @@ certain optimisations when this is set. I32 exec(pTHX_ REGEXP * const rx, char *stringarg, char* strend, char* strbeg, - I32 minend, SV* sv, + SSize_t minend, SV* sv, void* data, U32 flags); Execute a regexp. The arguments are diff --git a/proto.h b/proto.h index 26b52a3..0bc3b55 100644 --- a/proto.h +++ b/proto.h @@ -3187,7 +3187,7 @@ PERL_CALLCONV REGEXP* Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags) #define PERL_ARGS_ASSERT_PREGCOMP \ assert(pattern) -PERL_CALLCONV I32 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave) +PERL_CALLCONV I32 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, SSize_t minend, SV* screamer, U32 nosave) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3) @@ -3409,7 +3409,7 @@ PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r) #define PERL_ARGS_ASSERT_REGDUMP \ assert(r) -PERL_CALLCONV I32 Perl_regexec_flags(pTHX_ REGEXP *const rx, char *stringarg, char *strend, char *strbeg, I32 minend, SV *sv, void *data, U32 flags) +PERL_CALLCONV I32 Perl_regexec_flags(pTHX_ REGEXP *const rx, char *stringarg, char *strend, char *strbeg, SSize_t minend, SV *sv, void *data, U32 flags) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3) @@ -6766,14 +6766,14 @@ STATIC char * S_regwhite(struct RExC_state_t *pRExC_state, char *p) #define PERL_ARGS_ASSERT_REGWHITE \ assert(pRExC_state); assert(p) -STATIC void S_scan_commit(pTHX_ const struct RExC_state_t *pRExC_state, struct scan_data_t *data, I32 *minlenp, int is_inf) +STATIC void S_scan_commit(pTHX_ const struct RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); #define PERL_ARGS_ASSERT_SCAN_COMMIT \ assert(pRExC_state); assert(data); assert(minlenp) -STATIC SSize_t S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, I32 *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth) +STATIC SSize_t S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, struct regnode_charclass_class *and_withp, U32 flags, U32 depth) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3) @@ -6979,14 +6979,14 @@ STATIC CHECKPOINT S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxop #define PERL_ARGS_ASSERT_REGCPPUSH \ assert(rex) -STATIC U8* S_reghop3(U8 *s, I32 off, const U8 *lim) +STATIC U8* S_reghop3(U8 *s, SSize_t off, const U8 *lim) __attribute__warn_unused_result__ __attribute__nonnull__(1) __attribute__nonnull__(3); #define PERL_ARGS_ASSERT_REGHOP3 \ assert(s); assert(lim) -STATIC U8* S_reghopmaybe3(U8 *s, I32 off, const U8 *lim) +STATIC U8* S_reghopmaybe3(U8 *s, SSize_t off, const U8 *lim) __attribute__warn_unused_result__ __attribute__nonnull__(1) __attribute__nonnull__(3); @@ -7035,7 +7035,7 @@ STATIC void S_to_utf8_substr(pTHX_ regexp * prog) assert(prog) # if defined(XXX_dmq) -STATIC U8* S_reghop4(U8 *s, I32 off, const U8 *llim, const U8 *rlim) +STATIC U8* S_reghop4(U8 *s, SSize_t off, const U8 *llim, const U8 *rlim) __attribute__warn_unused_result__ __attribute__nonnull__(1) __attribute__nonnull__(3) diff --git a/regcomp.c b/regcomp.c index 1f89754..5a1e234 100644 --- a/regcomp.c +++ b/regcomp.c @@ -123,7 +123,7 @@ typedef struct RExC_state_t { char *start; /* Start of input for compile */ char *end; /* End of input for compile */ char *parse; /* Input-scan pointer. */ - I32 whilem_seen; /* number of WHILEM in this expr */ + SSize_t whilem_seen; /* number of WHILEM in this expr */ regnode *emit_start; /* Start of emitted-code area */ regnode *emit_bound; /* First regnode outside of the allocated space */ regnode *emit; /* Code-emit pointer; if = &emit_dummy, @@ -132,7 +132,7 @@ typedef struct RExC_state_t { I32 naughty; /* How bad is this pattern? */ I32 sawback; /* Did we see \1, ...? */ U32 seen; - I32 size; /* Code size. */ + SSize_t size; /* Code size. */ I32 npar; /* Capture buffer count, (OPEN). */ I32 cpar; /* Capture buffer count, (CLOSE). */ I32 nestroot; /* root parens we are in - used by accept */ @@ -301,7 +301,7 @@ typedef struct RExC_state_t { - max_offset Only used for floating strings. This is the rightmost point that - the string can appear at. If set to I32 max it indicates that the + the string can appear at. If set to SSize_t_MAX it indicates that the string can occur infinitely far to the right. - minlenp @@ -346,20 +346,20 @@ typedef struct scan_data_t { /*I32 len_min; unused */ /*I32 len_delta; unused */ SSize_t pos_min; - I32 pos_delta; + SSize_t pos_delta; SV *last_found; - I32 last_end; /* min value, <0 unless valid. */ + SSize_t last_end; /* min value, <0 unless valid. */ SSize_t last_start_min; - I32 last_start_max; + SSize_t last_start_max; SV **longest; /* Either &l_fixed, or &l_float. */ SV *longest_fixed; /* longest fixed string found in pattern */ SSize_t offset_fixed; /* offset where it starts */ - I32 *minlen_fixed; /* pointer to the minlen relevant to the string */ + SSize_t *minlen_fixed; /* pointer to the minlen relevant to the string */ I32 lookbehind_fixed; /* is the position of the string modfied by LB */ SV *longest_float; /* longest floating string found in pattern */ SSize_t offset_float_min; /* earliest point in string it can appear */ - I32 offset_float_max; /* latest point in string it can appear */ - I32 *minlen_float; /* pointer to the minlen relevant to the string */ + SSize_t offset_float_max; /* latest point in string it can appear */ + SSize_t *minlen_float; /* pointer to the minlen relevant to the string */ SSize_t lookbehind_float; /* is the pos of the string modified by LB */ I32 flags; I32 whilem_c; @@ -748,7 +748,8 @@ DEBUG_OPTIMISE_MORE_r(if(data){ \ floating substrings if needed. */ STATIC void -S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp, int is_inf) +S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, + SSize_t *minlenp, int is_inf) { const STRLEN l = CHR_SVLEN(data->last_found); const STRLEN old_l = CHR_SVLEN(*data->longest); @@ -772,9 +773,12 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min data->offset_float_min = l ? data->last_start_min : data->pos_min; data->offset_float_max = (l ? data->last_start_max - : (data->pos_delta == I32_MAX ? I32_MAX : data->pos_min + data->pos_delta)); - if (is_inf || (U32)data->offset_float_max > (U32)I32_MAX) - data->offset_float_max = I32_MAX; + : (data->pos_delta == SSize_t_MAX + ? SSize_t_MAX + : data->pos_min + data->pos_delta)); + if (is_inf + || (STRLEN)data->offset_float_max > (STRLEN)SSize_t_MAX) + data->offset_float_max = SSize_t_MAX; if (data->flags & SF_BEFORE_EOL) data->flags |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL); @@ -3030,7 +3034,7 @@ typedef struct scan_frame { STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, - I32 *minlenp, SSize_t *deltap, + SSize_t *minlenp, SSize_t *deltap, regnode *last, scan_data_t *data, I32 stopparen, @@ -3046,17 +3050,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */ { dVAR; - I32 min = 0; /* There must be at least this number of characters to match */ + /* There must be at least this number of characters to match */ + SSize_t min = 0; I32 pars = 0, code; regnode *scan = *scanp, *next; - I32 delta = 0; + SSize_t delta = 0; int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF); int is_inf_internal = 0; /* The studied chunk is infinite */ I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0; scan_data_t data_fake; SV *re_trie_maxbuff = NULL; regnode *first_non_open = scan; - I32 stopmin = I32_MAX; + SSize_t stopmin = SSize_t_MAX; scan_frame *frame = NULL; GET_RE_DEBUG_FLAGS_DECL; @@ -3166,9 +3171,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, stopparen, recursed, NULL, f,depth+1); if (min1 > minnext) min1 = minnext; - if (deltanext == I32_MAX) { + if (deltanext == SSize_t_MAX) { is_inf = is_inf_internal = 1; - max1 = I32_MAX; + max1 = SSize_t_MAX; } else if (max1 < minnext + deltanext) max1 = minnext + deltanext; scan = next; @@ -3193,16 +3198,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, min1 = 0; if (flags & SCF_DO_SUBSTR) { data->pos_min += min1; - if (data->pos_delta >= I32_MAX - (max1 - min1)) - data->pos_delta = I32_MAX; + if (data->pos_delta >= SSize_t_MAX - (max1 - min1)) + data->pos_delta = SSize_t_MAX; else data->pos_delta += max1 - min1; if (max1 != min1 || is_inf) data->longest = &(data->longest_float); } min += min1; - if (delta == I32_MAX || I32_MAX - delta - (max1 - min1) < 0) - delta = I32_MAX; + if (delta == SSize_t_MAX + || SSize_t_MAX - delta - (max1 - min1) < 0) + delta = SSize_t_MAX; else delta += max1 - min1; if (flags & SCF_DO_STCLASS_OR) { @@ -3590,7 +3596,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } } else if (OP(scan) == EXACT) { - I32 l = STR_LEN(scan); + SSize_t l = STR_LEN(scan); UV uc; if (UTF) { const U8 * const s = (U8*)STRING(scan); @@ -3606,7 +3612,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (data->last_end == -1) { /* Update the start info. */ data->last_start_min = data->pos_min; data->last_start_max = is_inf - ? I32_MAX : data->pos_min + data->pos_delta; + ? SSize_t_MAX : data->pos_min + data->pos_delta; } sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan)); if (UTF) @@ -3796,8 +3802,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, flags &= ~SCF_DO_STCLASS; } else if (REGNODE_VARIES(OP(scan))) { - SSize_t mincount, maxcount, minnext, deltanext; - I32 fl = 0, f = flags, pos_before = 0; + SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0; + I32 fl = 0, f = flags; regnode * const oscan = scan; struct regnode_charclass_class this_class; struct regnode_charclass_class *oclass = NULL; @@ -3934,11 +3940,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } min += minnext * mincount; - is_inf_internal |= deltanext == I32_MAX + is_inf_internal |= deltanext == SSize_t_MAX || (maxcount == REG_INFTY && minnext + deltanext > 0); is_inf |= is_inf_internal; if (is_inf) - delta = I32_MAX; + delta = SSize_t_MAX; else delta += (minnext + deltanext) * maxcount - minnext * mincount; @@ -4068,10 +4074,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */ #if defined(SPARC64_GCC_WORKAROUND) - I32 b = 0; + SSize_t b = 0; STRLEN l = 0; const char *s = NULL; - I32 old = 0; + SSize_t old = 0; if (pos_before >= data->last_start_min) b = pos_before; @@ -4083,11 +4089,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, old = b - data->last_start_min; #else - I32 b = pos_before >= data->last_start_min + SSize_t b = pos_before >= data->last_start_min ? pos_before : data->last_start_min; STRLEN l; const char * const s = SvPV_const(data->last_found, l); - I32 old = b - data->last_start_min; + SSize_t old = b - data->last_start_min; #endif if (UTF) @@ -4119,20 +4125,26 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else { /* start offset must point into the last copy */ data->last_start_min += minnext * (mincount - 1); - data->last_start_max += is_inf ? I32_MAX + data->last_start_max += is_inf ? SSize_t_MAX : (maxcount - 1) * (minnext + data->pos_delta); } } /* It is counted once already... */ data->pos_min += minnext * (mincount - counted); #if 0 -PerlIO_printf(Perl_debug_log, "counted=%d deltanext=%d I32_MAX=%d minnext=%d maxcount=%d mincount=%d\n", - counted, deltanext, I32_MAX, minnext, maxcount, mincount); -if (deltanext != I32_MAX) -PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount, I32_MAX - data->pos_delta); +PerlIO_printf(Perl_debug_log, "counted=%"UVdf" deltanext=%"UVdf + " SSize_t_MAX=%"UVdf" minnext=%"UVdf + " maxcount=%"UVdf" mincount=%"UVdf"\n", + (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount, + (UV)mincount); +if (deltanext != SSize_t_MAX) +PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n", + (UV)(-counted * deltanext + (minnext + deltanext) * maxcount + - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta)); #endif - if (deltanext == I32_MAX || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= I32_MAX - data->pos_delta) - data->pos_delta = I32_MAX; + if (deltanext == SSize_t_MAX || + -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= SSize_t_MAX - data->pos_delta) + data->pos_delta = SSize_t_MAX; else data->pos_delta += - counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount; @@ -4152,7 +4164,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext data->last_start_min = data->pos_min - CHR_SVLEN(last_str); data->last_start_max = is_inf - ? I32_MAX + ? SSize_t_MAX : data->pos_min + data->pos_delta - CHR_SVLEN(last_str); } @@ -4443,7 +4455,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext length of the pattern, something we won't know about until after the recurse. */ - I32 deltanext, fake = 0; + SSize_t deltanext; + I32 fake = 0; regnode *nscan; struct regnode_charclass_class intrnl; int f = 0; @@ -4453,8 +4466,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext have to worry about freeing them when we know they wont be used, which would be a pain. */ - I32 *minnextp; - Newx( minnextp, 1, I32 ); + SSize_t *minnextp; + Newx( minnextp, 1, SSize_t ); SAVEFREEPV(minnextp); if (data) { @@ -4583,7 +4596,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext { if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR)) RExC_rx->extflags |= RXf_ANCH_GPOS; - if (RExC_rx->gofs < (U32)min) + if (RExC_rx->gofs < (STRLEN)min) RExC_rx->gofs = min; } else { RExC_rx->extflags |= RXf_GPOS_FLOAT; @@ -4599,7 +4612,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext regnode *trie_node= scan; regnode *tail= regnext(scan); reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; - I32 max1 = 0, min1 = I32_MAX; + SSize_t max1 = 0, min1 = SSize_t_MAX; struct regnode_charclass_class accum; if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */ @@ -4650,12 +4663,12 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode*)nextbranch); - if (min1 > (I32)(minnext + trie->minlen)) + if (min1 > (SSize_t)(minnext + trie->minlen)) min1 = minnext + trie->minlen; - if (deltanext == I32_MAX) { + if (deltanext == SSize_t_MAX) { is_inf = is_inf_internal = 1; - max1 = I32_MAX; - } else if (max1 < (I32)(minnext + deltanext + trie->maxlen)) + max1 = SSize_t_MAX; + } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxlen)) max1 = minnext + deltanext + trie->maxlen; if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) @@ -4749,9 +4762,9 @@ PerlIO_printf(Perl_debug_log, "LHS=%d RHS=%d\n", -counted * deltanext + (minnext DEBUG_STUDYDATA("pre-fin:",data,depth); *scanp = scan; - *deltap = is_inf_internal ? I32_MAX : delta; + *deltap = is_inf_internal ? SSize_t_MAX : delta; if (flags & SCF_DO_SUBSTR && is_inf) - data->pos_delta = I32_MAX - data->pos_min; + data->pos_delta = SSize_t_MAX - data->pos_min; if (is_par > (I32)U8_MAX) is_par = 0; if (is_par && pars==1 && data) { @@ -5419,13 +5432,15 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, STATIC bool -S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest, SV** rx_utf8, SV** rx_substr, I32* rx_end_shift, I32 lookbehind, I32 offset, I32 *minlen, STRLEN longest_length, bool eol, bool meol) +S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest, SV** rx_utf8, SV** rx_substr, SSize_t* rx_end_shift, + SSize_t lookbehind, SSize_t offset, SSize_t *minlen, STRLEN longest_length, bool eol, bool meol) { /* This is the common code for setting up the floating and fixed length * string data extracted from Perl_re_op_compile() below. Returns a boolean * as to whether succeeded or not */ - I32 t,ml; + I32 t; + SSize_t ml; if (! (longest_length || (eol /* Can't have SEOL and MULTI */ @@ -5450,7 +5465,7 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest, SV** rx_utf8, S follow this item. We calculate it ahead of time as once the lookbehind offset is added in we lose the ability to correctly calculate it.*/ - ml = minlen ? *(minlen) : (I32)longest_length; + ml = minlen ? *(minlen) : (SSize_t)longest_length; *rx_end_shift = ml - offset - longest_length + (SvTAIL(sv_longest) != 0) + lookbehind; @@ -5519,7 +5534,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, char *exp; regnode *scan; I32 flags; - I32 minlen = 0; + SSize_t minlen = 0; U32 rx_flags; SV *pat; SV *code_blocksv = NULL; @@ -6274,7 +6289,7 @@ reStudy: { r->float_min_offset = data.offset_float_min - data.lookbehind_float; r->float_max_offset = data.offset_float_max; - if (data.offset_float_max < I32_MAX) /* Don't offset infinity */ + if (data.offset_float_max < SSize_t_MAX) /* Don't offset infinity */ r->float_max_offset -= data.lookbehind_float; SvREFCNT_inc_simple_void_NN(data.longest_float); } @@ -6477,8 +6492,8 @@ reStudy: }); #ifdef RE_TRACK_PATTERN_OFFSETS DEBUG_OFFSETS_r(if (ri->u.offsets) { - const U32 len = ri->u.offsets[0]; - U32 i; + const STRLEN len = ri->u.offsets[0]; + STRLEN i; GET_RE_DEBUG_FLAGS_DECL; PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]); for (i = 1; i <= len; i++) { @@ -6666,7 +6681,7 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags) { SV *ret; AV *av; - I32 length; + SSize_t length; struct regexp *const rx = ReANY(r); PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR; @@ -12266,7 +12281,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in case we need to change the emitted regop to an EXACT. */ const char * orig_parse = RExC_parse; - const I32 orig_size = RExC_size; + const SSize_t orig_size = RExC_size; GET_RE_DEBUG_FLAGS_DECL; PERL_ARGS_ASSERT_REGCLASS; diff --git a/regexec.c b/regexec.c index eaabe8c..6a77019 100644 --- a/regexec.c +++ b/regexec.c @@ -369,7 +369,7 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p) ); paren = *maxopenparen_p; for ( ; i > 0; i -= REGCP_PAREN_ELEMS) { - I32 tmps; + SSize_t tmps; rex->offs[paren].start_tmp = SSPOPINT; rex->offs[paren].start = SSPOPIV; tmps = SSPOPIV; @@ -526,7 +526,7 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character) */ I32 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char *strend, - char *strbeg, I32 minend, SV *screamer, U32 nosave) + char *strbeg, SSize_t minend, SV *screamer, U32 nosave) /* stringarg: the point in the string at which to begin matching */ /* strend: pointer to null at end of string */ /* strbeg: real beginning of string */ @@ -617,7 +617,7 @@ Perl_re_intuit_start(pTHX_ struct regexp *const prog = ReANY(rx); SSize_t start_shift = 0; /* Should be nonnegative! */ - I32 end_shift = 0; + SSize_t end_shift = 0; char *s; SV *check; char *t; @@ -687,7 +687,7 @@ Perl_re_intuit_start(pTHX_ See [perl #115242] */ { /* Substring at constant offset from beg-of-str... */ - I32 slen; + SSize_t slen; s = HOP3c(strpos, prog->check_offset_min, strend); @@ -723,9 +723,9 @@ Perl_re_intuit_start(pTHX_ end_shift = prog->check_end_shift; if (!ml_anch) { - const I32 end = prog->check_offset_max + CHR_SVLEN(check) + const SSize_t end = prog->check_offset_max + CHR_SVLEN(check) - (SvTAIL(check) != 0); - const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end; + const SSize_t eshift = CHR_DIST((U8*)strend, (U8*)s) - end; if (end_shift < eshift) end_shift = eshift; @@ -752,7 +752,7 @@ Perl_re_intuit_start(pTHX_ { SSize_t srch_start_shift = start_shift; - I32 srch_end_shift = end_shift; + SSize_t srch_end_shift = end_shift; U8* start_point; U8* end_point; if (srch_start_shift < 0 && strbeg - s > srch_start_shift) { @@ -1555,7 +1555,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, * characters, and there are only 2 availabe, we know without * trying that it will fail; so don't start a match past the * required minimum number from the far end */ - e = HOP3c(strend, -((I32)ln), s); + e = HOP3c(strend, -((SSize_t)ln), s); if (reginfo->intuit && e < s) { e = s; /* Due to minlen logic of intuit() */ @@ -1601,7 +1601,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, * only 2 are left, it's guaranteed to fail, so don't start a * match that would require us to go beyond the end of the string */ - e = HOP3c(strend, -((I32)lnc), s); + e = HOP3c(strend, -((SSize_t)lnc), s); if (reginfo->intuit && e < s) { e = s; /* Due to minlen logic of intuit() */ @@ -2099,7 +2099,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, { SSize_t min = 0; SSize_t max = strend - strbeg; - I32 sublen; + SSize_t sublen; if ( (flags & REXEC_COPY_SKIP_POST) && !(prog->extflags & RXf_PMf_KEEPCOPY) /* //p */ @@ -2179,7 +2179,8 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, * $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g; * from going quadratic */ if (SvPOKp(sv) && SvPVX(sv) == strbeg) - sv_pos_b2u(sv, &(prog->subcoffset)); + prog->subcoffset = sv_pos_b2u_flags(sv, prog->subcoffset, + SV_GMAGIC|SV_CONST_RETURN); else prog->subcoffset = utf8_length((U8*)strbeg, (U8*)(strbeg+prog->suboffset)); @@ -2202,7 +2203,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, */ I32 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, - char *strbeg, I32 minend, SV *sv, void *data, U32 flags) + char *strbeg, SSize_t minend, SV *sv, void *data, U32 flags) /* stringarg: the point in the string at which to begin matching */ /* strend: pointer to null at end of string */ /* strbeg: real beginning of string */ @@ -2219,8 +2220,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, char *s; regnode *c; char *startpos; - I32 minlen; /* must match at least this many chars */ - I32 dontbother = 0; /* how many characters not to try at end */ + SSize_t minlen; /* must match at least this many chars */ + SSize_t dontbother = 0; /* how many characters not to try at end */ const bool utf8_target = cBOOL(DO_UTF8(sv)); I32 multiline; RXi_GET_DECL(prog,progi); @@ -2603,8 +2604,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, || ((prog->float_substr != NULL || prog->float_utf8 != NULL) && prog->float_max_offset < strend - s)) { SV *must; - I32 back_max; - I32 back_min; + SSize_t back_max; + SSize_t back_min; char *last; char *last1; /* Last position checked before */ #ifdef DEBUGGING @@ -2649,7 +2650,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, last = strend; } else { last = HOP3c(strend, /* Cannot start after this */ - -(I32)(CHR_SVLEN(must) + -(SSize_t)(CHR_SVLEN(must) - (SvTAIL(must) != 0) + back_min), strbeg); } if (s > reginfo->strbeg) @@ -3601,7 +3602,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) regnode *scan; regnode *next; U32 n = 0; /* general value; init to avoid compiler warning */ - I32 ln = 0; /* len or last; init to avoid compiler warning */ + SSize_t ln = 0; /* len or last; init to avoid compiler warning */ char *locinput = startpos; char *pushinput; /* where to continue after a PUSH */ I32 nextchr; /* is always set to UCHARAT(locinput) */ @@ -5558,10 +5559,10 @@ NULL if (reginfo->poscache_iter-- == 0) { /* initialise cache */ - const I32 size = (reginfo->poscache_maxiter + 7)/8; + const SSize_t size = (reginfo->poscache_maxiter + 7)/8; regmatch_info_aux *const aux = reginfo->info_aux; if (aux->poscache) { - if ((I32)reginfo->poscache_size < size) { + if ((SSize_t)reginfo->poscache_size < size) { Renew(aux->poscache, size, char); reginfo->poscache_size = size; } @@ -5579,7 +5580,7 @@ NULL if (reginfo->poscache_iter < 0) { /* have we already failed at this position? */ - I32 offset, mask; + SSize_t offset, mask; reginfo->poscache_iter = -1; /* stop eventual underflow */ offset = (scan->flags & 0xf) - 1 @@ -7519,7 +7520,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const } STATIC U8 * -S_reghop3(U8 *s, I32 off, const U8* lim) +S_reghop3(U8 *s, SSize_t off, const U8* lim) { /* return the position 'off' UTF-8 characters away from 's', forward if * 'off' >= 0, backwards if negative. But don't go outside of position @@ -7554,7 +7555,7 @@ S_reghop3(U8 *s, I32 off, const U8* lim) we ifdef it out - dmq */ STATIC U8 * -S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim) +S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim) { dVAR; @@ -7581,7 +7582,7 @@ S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim) #endif STATIC U8 * -S_reghopmaybe3(U8* s, I32 off, const U8* lim) +S_reghopmaybe3(U8* s, SSize_t off, const U8* lim) { dVAR; diff --git a/regexp.h b/regexp.h index 8542cb1..928a374 100644 --- a/regexp.h +++ b/regexp.h @@ -40,7 +40,7 @@ struct reg_substr_datum { SSize_t max_offset; SV *substr; /* non-utf8 variant */ SV *utf8_substr; /* utf8 variant */ - I32 end_shift; + SSize_t end_shift; }; struct reg_substr_data { struct reg_substr_datum data[3]; /* Actual array */ @@ -63,7 +63,7 @@ typedef struct regexp_paren_pair { * "abc" =~ /(.(?{print "[$1]"}))+/ *outputs [][a][b] * This field is not part of the API. */ - I32 start_tmp; + SSize_t start_tmp; } regexp_paren_pair; #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) @@ -104,9 +104,9 @@ struct reg_code_block { /* Information about the match that the perl core uses to */ \ /* manage things */ \ U32 extflags; /* Flags used both externally and internally */ \ - I32 minlen; /* mininum possible number of chars in string to match */\ - I32 minlenret; /* mininum possible number of chars in $& */ \ - U32 gofs; /* chars left of pos that we search from */ \ + SSize_t minlen; /* mininum possible number of chars in string to match */\ + SSize_t minlenret; /* mininum possible number of chars in $& */ \ + STRLEN gofs; /* chars left of pos that we search from */ \ /* substring data about strings that must appear in the */ \ /* final match, used for optimisations */ \ struct reg_substr_data *substrs; \ @@ -125,8 +125,8 @@ struct reg_code_block { char *subbeg; \ SV_SAVED_COPY /* If non-NULL, SV which is COW from original */\ SSize_t sublen; /* Length of string pointed by subbeg */ \ - I32 suboffset; /* byte offset of subbeg from logical start of str */ \ - I32 subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \ + SSize_t suboffset; /* byte offset of subbeg from logical start of str */ \ + SSize_t subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \ /* Information about the match that isn't often used */ \ /* offset from wrapped to the start of precomp */ \ PERL_BITFIELD32 pre_prefix:4; \ @@ -146,7 +146,7 @@ typedef struct regexp { typedef struct re_scream_pos_data_s { char **scream_olds; /* match pos */ - I32 *scream_pos; /* Internal iterator of scream. */ + SSize_t *scream_pos; /* Internal iterator of scream. */ } re_scream_pos_data; /* regexp_engine structure. This is the dispatch table for regexes. @@ -155,7 +155,7 @@ typedef struct re_scream_pos_data_s typedef struct regexp_engine { REGEXP* (*comp) (pTHX_ SV * const pattern, U32 flags); I32 (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend, - char* strbeg, I32 minend, SV* sv, + char* strbeg, SSize_t minend, SV* sv, void* data, U32 flags); char* (*intuit) (pTHX_ REGEXP * const rx, @@ -606,7 +606,7 @@ typedef struct { STRLEN suboffset; /* saved suboffset field from rex */ STRLEN subcoffset; /* saved subcoffset field from rex */ MAGIC *pos_magic; /* pos() magic attached to $_ */ - I32 pos; /* the original value of pos() in pos_magic */ + SSize_t pos; /* the original value of pos() in pos_magic */ U8 pos_flags; /* flags to be restored; currently only MGf_BYTES*/ } regmatch_info_aux_eval; -- 2.7.4