From 84da74a7adb7db2354917b83df794f4983438fcd Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Sun, 17 Sep 2006 16:57:57 +0200 Subject: [PATCH] Add hook for re_dup() into regex engine as reg_dupe (make re Message-ID: <9b18b3110609170557r73d94c18v90285bd57a38b876@mail.gmail.com> Date: Sun, 17 Sep 2006 14:57:57 +0200 p4raw-id: //depot/perl@28891 --- embed.fnc | 1 + embed.h | 2 + embedvar.h | 2 + ext/re/re.xs | 9 +++- ext/re/re_top.h | 15 +++--- global.sym | 1 + perl.h | 2 + perlapi.h | 2 + proto.h | 4 ++ regcomp.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sv.c | 146 ++++++------------------------------------------------ thrdvar.h | 4 ++ 12 files changed, 199 insertions(+), 139 deletions(-) diff --git a/embed.fnc b/embed.fnc index 7320b9f..5755f06 100644 --- a/embed.fnc +++ b/embed.fnc @@ -668,6 +668,7 @@ Ap |I32 |pregexec |NN regexp* prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ |NN SV* screamer|U32 nosave Ap |void |pregfree |NULLOK struct regexp* r +Ap |regexp*|regdupe |NN const regexp* r|NN CLONE_PARAMS* param Ap |regexp*|pregcomp |NN char* exp|NN char* xend|NN PMOP* pm Ap |char* |re_intuit_start|NN regexp* prog|NULLOK SV* sv|NN char* strpos \ |NN char* strend|U32 flags \ diff --git a/embed.h b/embed.h index 4ae5706..0ec1775 100644 --- a/embed.h +++ b/embed.h @@ -680,6 +680,7 @@ #define regclass_swash Perl_regclass_swash #define pregexec Perl_pregexec #define pregfree Perl_pregfree +#define regdupe Perl_regdupe #define pregcomp Perl_pregcomp #define re_intuit_start Perl_re_intuit_start #define re_intuit_string Perl_re_intuit_string @@ -2876,6 +2877,7 @@ #define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e) #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) +#define regdupe(a,b) Perl_regdupe(aTHX_ a,b) #define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c) #define re_intuit_start(a,b,c,d,e,f) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f) #define re_intuit_string(a) Perl_re_intuit_string(aTHX_ a) diff --git a/embedvar.h b/embedvar.h index 9691e53..b387238 100644 --- a/embedvar.h +++ b/embedvar.h @@ -84,6 +84,7 @@ #define PL_reg_state (vTHX->Treg_state) #define PL_regcompp (vTHX->Tregcompp) #define PL_regdummy (vTHX->Tregdummy) +#define PL_regdupe (vTHX->Tregdupe) #define PL_regexecp (vTHX->Tregexecp) #define PL_regfree (vTHX->Tregfree) #define PL_regint_start (vTHX->Tregint_start) @@ -757,6 +758,7 @@ #define PL_Treg_state PL_reg_state #define PL_Tregcompp PL_regcompp #define PL_Tregdummy PL_regdummy +#define PL_Tregdupe PL_regdupe #define PL_Tregexecp PL_regexecp #define PL_Tregfree PL_regfree #define PL_Tregint_start PL_regint_start diff --git a/ext/re/re.xs b/ext/re/re.xs index 0a90f9f..7fad146 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -19,6 +19,9 @@ extern char* my_re_intuit_start (pTHX_ regexp *prog, SV *sv, char *strpos, struct re_scream_pos_data_s *data); extern SV* my_re_intuit_string (pTHX_ regexp *prog); +extern regexp* my_regdupe (pTHX_ regexp *r, CLONE_PARAMS *param); + + END_EXTERN_C /* engine details need to be paired - non debugging, debuggin */ @@ -33,13 +36,14 @@ struct regexp_engine { struct re_scream_pos_data_s *data); SV* (*re_intuit_string) (pTHX_ regexp *prog); void (*regfree) (pTHX_ struct regexp* r); + regexp* (*regdupe) (pTHX_ regexp *r, CLONE_PARAMS *param); }; struct regexp_engine engines[] = { { Perl_pregcomp, Perl_regexec_flags, Perl_re_intuit_start, - Perl_re_intuit_string, Perl_pregfree }, + Perl_re_intuit_string, Perl_pregfree, Perl_regdupe }, { my_regcomp, my_regexec, my_re_intuit_start, my_re_intuit_string, - my_regfree } + my_regfree, my_regdupe } }; #define MY_CXT_KEY "re::_guts" XS_VERSION @@ -72,6 +76,7 @@ install(pTHX_ unsigned int new_state) PL_regint_start = engines[new_state].re_intuit_start; PL_regint_string = engines[new_state].re_intuit_string; PL_regfree = engines[new_state].regfree; + PL_regdupe = engines[new_state].regdupe; if (new_state & NEEDS_DEBUGGING) { PL_colorset = 0; /* Allow reinspection of ENV. */ diff --git a/ext/re/re_top.h b/ext/re/re_top.h index 5964672..af729ae 100644 --- a/ext/re/re_top.h +++ b/ext/re/re_top.h @@ -8,13 +8,14 @@ #endif /* We *really* need to overwrite these symbols: */ -#define Perl_regexec_flags my_regexec -#define Perl_regdump my_regdump -#define Perl_regprop my_regprop -#define Perl_re_intuit_start my_re_intuit_start -#define Perl_pregcomp my_regcomp -#define Perl_pregfree my_regfree -#define Perl_re_intuit_string my_re_intuit_string +#define Perl_regexec_flags my_regexec +#define Perl_regdump my_regdump +#define Perl_regprop my_regprop +#define Perl_re_intuit_start my_re_intuit_start +#define Perl_pregcomp my_regcomp +#define Perl_pregfree my_regfree +#define Perl_re_intuit_string my_re_intuit_string +#define Perl_regdupe my_regdupe #define PERL_NO_GET_CONTEXT diff --git a/global.sym b/global.sym index 3b4b4e5..b33fded 100644 --- a/global.sym +++ b/global.sym @@ -386,6 +386,7 @@ Perl_regdump Perl_regclass_swash Perl_pregexec Perl_pregfree +Perl_regdupe Perl_pregcomp Perl_re_intuit_start Perl_re_intuit_string diff --git a/perl.h b/perl.h index 0f71630..b4cd6fe 100644 --- a/perl.h +++ b/perl.h @@ -200,6 +200,7 @@ #define CALLREG_INTUIT_START CALL_FPTR(PL_regint_start) #define CALLREG_INTUIT_STRING CALL_FPTR(PL_regint_string) #define CALLREGFREE CALL_FPTR(PL_regfree) +#define CALLREGDUPE CALL_FPTR(PL_regdupe) /* * Because of backward compatibility reasons the PERL_UNUSED_DECL @@ -4327,6 +4328,7 @@ typedef char* (CPERLscope(*re_intuit_start_t)) (pTHX_ regexp *prog, SV *sv, struct re_scream_pos_data_s *d); typedef SV* (CPERLscope(*re_intuit_string_t)) (pTHX_ regexp *prog); typedef void (CPERLscope(*regfree_t)) (pTHX_ struct regexp* r); +typedef regexp*(CPERLscope(*regdupe_t)) (pTHX_ const regexp* r, CLONE_PARAMS *param); typedef void (*DESTRUCTORFUNC_NOCONTEXT_t) (void*); typedef void (*DESTRUCTORFUNC_t) (pTHX_ void*); diff --git a/perlapi.h b/perlapi.h index aac1e16..f5b8d12 100644 --- a/perlapi.h +++ b/perlapi.h @@ -788,6 +788,8 @@ END_EXTERN_C #define PL_regcompp (*Perl_Tregcompp_ptr(aTHX)) #undef PL_regdummy #define PL_regdummy (*Perl_Tregdummy_ptr(aTHX)) +#undef PL_regdupe +#define PL_regdupe (*Perl_Tregdupe_ptr(aTHX)) #undef PL_regexecp #define PL_regexecp (*Perl_Tregexecp_ptr(aTHX)) #undef PL_regfree diff --git a/proto.h b/proto.h index 87daeeb..386f4ab 100644 --- a/proto.h +++ b/proto.h @@ -1833,6 +1833,10 @@ PERL_CALLCONV I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* stren __attribute__nonnull__(pTHX_6); PERL_CALLCONV void Perl_pregfree(pTHX_ struct regexp* r); +PERL_CALLCONV regexp* Perl_regdupe(pTHX_ const regexp* r, CLONE_PARAMS* param) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); + PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regcomp.c b/regcomp.c index 4684646..6c1e574 100644 --- a/regcomp.c +++ b/regcomp.c @@ -7515,6 +7515,12 @@ Perl_re_intuit_string(pTHX_ regexp *prog) return prog->check_substr ? prog->check_substr : prog->check_utf8; } +/* + pregfree - free a regexp + + See regdupe below if you change anything here. +*/ + void Perl_pregfree(pTHX_ struct regexp *r) { @@ -7657,6 +7663,150 @@ Perl_pregfree(pTHX_ struct regexp *r) Safefree(r); } +#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t)) +#define av_dup_inc(s,t) (AV*)SvREFCNT_inc(sv_dup((SV*)s,t)) +#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL) + +/* + regdupe - duplicate a regexp. + + This routine is called by sv.c's re_dup and is expected to clone a + given regexp structure. It is a no-op when not under USE_ITHREADS. + (Originally this *was* re_dup() for change history see sv.c) + + See pregfree() above if you change anything here. +*/ + +regexp * +Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) +{ +#if defined(USE_ITHREADS) + dVAR; + REGEXP *ret; + int i, len, npar; + struct reg_substr_datum *s; + + if (!r) + return (REGEXP *)NULL; + + if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r))) + return ret; + + len = r->offsets[0]; + npar = r->nparens+1; + + Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp); + Copy(r->program, ret->program, len+1, regnode); + + Newx(ret->startp, npar, I32); + Copy(r->startp, ret->startp, npar, I32); + Newx(ret->endp, npar, I32); + Copy(r->startp, ret->startp, npar, I32); + + Newx(ret->substrs, 1, struct reg_substr_data); + for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { + s->min_offset = r->substrs->data[i].min_offset; + s->max_offset = r->substrs->data[i].max_offset; + s->end_shift = r->substrs->data[i].end_shift; + s->substr = sv_dup_inc(r->substrs->data[i].substr, param); + s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); + } + + ret->regstclass = NULL; + if (r->data) { + struct reg_data *d; + const int count = r->data->count; + int i; + + Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), + char, struct reg_data); + Newx(d->what, count, U8); + + d->count = count; + for (i = 0; i < count; i++) { + d->what[i] = r->data->what[i]; + switch (d->what[i]) { + /* legal options are one of: sfpont + see also regcomp.h and pregfree() */ + case 's': + d->data[i] = sv_dup_inc((SV *)r->data->data[i], param); + break; + case 'p': + d->data[i] = av_dup_inc((AV *)r->data->data[i], param); + break; + case 'f': + /* This is cheating. */ + Newx(d->data[i], 1, struct regnode_charclass_class); + StructCopy(r->data->data[i], d->data[i], + struct regnode_charclass_class); + ret->regstclass = (regnode*)d->data[i]; + break; + case 'o': + /* Compiled op trees are readonly, and can thus be + shared without duplication. */ + OP_REFCNT_LOCK; + d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); + OP_REFCNT_UNLOCK; + break; + case 'n': + d->data[i] = r->data->data[i]; + break; + case 't': + d->data[i] = r->data->data[i]; + OP_REFCNT_LOCK; + ((reg_trie_data*)d->data[i])->refcount++; + OP_REFCNT_UNLOCK; + break; + case 'T': + d->data[i] = r->data->data[i]; + OP_REFCNT_LOCK; + ((reg_ac_data*)d->data[i])->refcount++; + OP_REFCNT_UNLOCK; + /* Trie stclasses are readonly and can thus be shared + * without duplication. We free the stclass in pregfree + * when the corresponding reg_ac_data struct is freed. + */ + ret->regstclass= r->regstclass; + break; + default: + Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]); + } + } + + ret->data = d; + } + else + ret->data = NULL; + + Newx(ret->offsets, 2*len+1, U32); + Copy(r->offsets, ret->offsets, 2*len+1, U32); + + ret->precomp = SAVEPVN(r->precomp, r->prelen); + ret->refcnt = r->refcnt; + ret->minlen = r->minlen; + ret->prelen = r->prelen; + ret->nparens = r->nparens; + ret->lastparen = r->lastparen; + ret->lastcloseparen = r->lastcloseparen; + ret->reganch = r->reganch; + + ret->sublen = r->sublen; + + if (RX_MATCH_COPIED(ret)) + ret->subbeg = SAVEPVN(r->subbeg, r->sublen); + else + ret->subbeg = NULL; +#ifdef PERL_OLD_COPY_ON_WRITE + ret->saved_copy = NULL; +#endif + + ptr_table_store(PL_ptr_table, r, ret); + return ret; +#else + return NULL; +#endif +} + #ifndef PERL_IN_XSUB_RE /* - regnext - dig the "next" pointer out of a node diff --git a/sv.c b/sv.c index 1112f21..7d7d234 100644 --- a/sv.c +++ b/sv.c @@ -9483,127 +9483,7 @@ ptr_table_* functions. REGEXP * Perl_re_dup(pTHX_ const REGEXP *r, CLONE_PARAMS *param) { - dVAR; - REGEXP *ret; - int i, len, npar; - struct reg_substr_datum *s; - - if (!r) - return (REGEXP *)NULL; - - if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r))) - return ret; - - len = r->offsets[0]; - npar = r->nparens+1; - - Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp); - Copy(r->program, ret->program, len+1, regnode); - - Newx(ret->startp, npar, I32); - Copy(r->startp, ret->startp, npar, I32); - Newx(ret->endp, npar, I32); - Copy(r->startp, ret->startp, npar, I32); - - Newx(ret->substrs, 1, struct reg_substr_data); - for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { - s->min_offset = r->substrs->data[i].min_offset; - s->max_offset = r->substrs->data[i].max_offset; - s->end_shift = r->substrs->data[i].end_shift; - s->substr = sv_dup_inc(r->substrs->data[i].substr, param); - s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); - } - - ret->regstclass = NULL; - if (r->data) { - struct reg_data *d; - const int count = r->data->count; - int i; - - Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), - char, struct reg_data); - Newx(d->what, count, U8); - - d->count = count; - for (i = 0; i < count; i++) { - d->what[i] = r->data->what[i]; - switch (d->what[i]) { - /* legal options are one of: sfpont - see also regcomp.h and pregfree() */ - case 's': - d->data[i] = sv_dup_inc((SV *)r->data->data[i], param); - break; - case 'p': - d->data[i] = av_dup_inc((AV *)r->data->data[i], param); - break; - case 'f': - /* This is cheating. */ - Newx(d->data[i], 1, struct regnode_charclass_class); - StructCopy(r->data->data[i], d->data[i], - struct regnode_charclass_class); - ret->regstclass = (regnode*)d->data[i]; - break; - case 'o': - /* Compiled op trees are readonly, and can thus be - shared without duplication. */ - OP_REFCNT_LOCK; - d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); - OP_REFCNT_UNLOCK; - break; - case 'n': - d->data[i] = r->data->data[i]; - break; - case 't': - d->data[i] = r->data->data[i]; - OP_REFCNT_LOCK; - ((reg_trie_data*)d->data[i])->refcount++; - OP_REFCNT_UNLOCK; - break; - case 'T': - d->data[i] = r->data->data[i]; - OP_REFCNT_LOCK; - ((reg_ac_data*)d->data[i])->refcount++; - OP_REFCNT_UNLOCK; - /* Trie stclasses are readonly and can thus be shared - * without duplication. We free the stclass in pregfree - * when the corresponding reg_ac_data struct is freed. - */ - ret->regstclass= r->regstclass; - break; - default: - Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]); - } - } - - ret->data = d; - } - else - ret->data = NULL; - - Newx(ret->offsets, 2*len+1, U32); - Copy(r->offsets, ret->offsets, 2*len+1, U32); - - ret->precomp = SAVEPVN(r->precomp, r->prelen); - ret->refcnt = r->refcnt; - ret->minlen = r->minlen; - ret->prelen = r->prelen; - ret->nparens = r->nparens; - ret->lastparen = r->lastparen; - ret->lastcloseparen = r->lastcloseparen; - ret->reganch = r->reganch; - - ret->sublen = r->sublen; - - if (RX_MATCH_COPIED(ret)) - ret->subbeg = SAVEPVN(r->subbeg, r->sublen); - else - ret->subbeg = NULL; -#ifdef PERL_OLD_COPY_ON_WRITE - ret->saved_copy = NULL; -#endif - - ptr_table_store(PL_ptr_table, r, ret); - return ret; + return CALLREGDUPE(aTHX_ r,param); } /* duplicate a file handle */ @@ -11060,6 +10940,20 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, sv_setpvn(PERL_DEBUG_PAD(1), "", 0); /* ext/re needs these */ sv_setpvn(PERL_DEBUG_PAD(2), "", 0); /* even without DEBUGGING. */ + + /* RE engine - function pointers -- must initilize these before + re_dup() is called. dmq. */ + PL_regcompp = proto_perl->Tregcompp; + PL_regexecp = proto_perl->Tregexecp; + PL_regint_start = proto_perl->Tregint_start; + PL_regint_string = proto_perl->Tregint_string; + PL_regfree = proto_perl->Tregfree; + PL_regdupe = proto_perl->Tregdupe; + + Zero(&PL_reg_state, 1, struct re_save_state); + PL_reginterp_cnt = 0; + PL_regmatch_slab = NULL; + /* Clone the regex array */ PL_regex_padav = newAV(); { @@ -11558,15 +11452,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_colorset = 0; /* reinits PL_colors[] */ /*PL_colors[6] = {0,0,0,0,0,0};*/ - /* RE engine - function pointers */ - PL_regcompp = proto_perl->Tregcompp; - PL_regexecp = proto_perl->Tregexecp; - PL_regint_start = proto_perl->Tregint_start; - PL_regint_string = proto_perl->Tregint_string; - PL_regfree = proto_perl->Tregfree; - Zero(&PL_reg_state, 1, struct re_save_state); - PL_reginterp_cnt = 0; - PL_regmatch_slab = NULL; + /* Pluggable optimizer */ PL_peepp = proto_perl->Tpeepp; diff --git a/thrdvar.h b/thrdvar.h index 581d60f..ead3278 100644 --- a/thrdvar.h +++ b/thrdvar.h @@ -176,6 +176,10 @@ PERLVARI(Tregint_string,re_intuit_string_t, MEMBER_TO_FPTR(Perl_re_intuit_string PERLVARI(Tregfree, regfree_t, MEMBER_TO_FPTR(Perl_pregfree)) /* Pointer to REx free()er */ +PERLVARI(Tregdupe, regdupe_t, MEMBER_TO_FPTR(Perl_regdupe)) + /* Pointer to REx dupe()er */ + + PERLVARI(Treginterp_cnt,int, 0) /* Whether "Regexp" was interpolated. */ PERLVARI(Twatchaddr, char **, 0) PERLVAR(Twatchok, char *) -- 2.7.4