From 5c35adbb8fc4c988807097c4d379e2485ada5865 Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Fri, 28 Dec 2007 09:59:06 +0000 Subject: [PATCH] First class regexps. p4raw-id: //depot/perl@32751 --- dump.c | 4 ++-- ext/B/B.pm | 2 +- ext/B/B.xs | 30 +++++++++++++++++++++++++++++- ext/B/t/b.t | 7 +++++-- ext/B/typemap | 1 + ext/Devel/Peek/t/Peek.t | 13 +++---------- lib/overload.t | 2 +- pp_ctl.c | 30 +++++++++++++----------------- pp_hot.c | 6 +++--- regcomp.c | 2 +- regexec.c | 24 ++++++++++++++++++------ sv.c | 35 ++++++++++++++++++++--------------- sv.h | 33 ++++++++++++++++++++++++++++++++- t/op/qr.t | 9 +-------- universal.c | 8 -------- util.c | 6 ++---- 16 files changed, 132 insertions(+), 80 deletions(-) diff --git a/dump.c b/dump.c index 1cda173..42cacb8 100644 --- a/dump.c +++ b/dump.c @@ -36,7 +36,7 @@ static const char* const svtypenames[SVt_LAST] = { "PVIV", "PVNV", "PVMG", - "ORANGE", + "REGEXP", "PVGV", "PVLV", "PVAV", @@ -56,7 +56,7 @@ static const char* const svshorttypenames[SVt_LAST] = { "PVIV", "PVNV", "PVMG", - "ORANGE", + "REGEXP", "GV", "PVLV", "AV", diff --git a/ext/B/B.pm b/ext/B/B.pm index 7c498e4..3e5e8ab 100644 --- a/ext/B/B.pm +++ b/ext/B/B.pm @@ -38,7 +38,7 @@ use strict; @B::PVIV::ISA = qw(B::PV B::IV); @B::PVNV::ISA = qw(B::PVIV B::NV); @B::PVMG::ISA = 'B::PVNV'; -@B::ORANGE::ISA = 'B::PVMG' if $] >= 5.011; +@B::REGEXP::ISA = 'B::PVMG' if $] >= 5.011; # Change in the inheritance hierarchy post 5.9.0 @B::PVLV::ISA = $] > 5.009 ? 'B::GV' : 'B::PVMG'; # BM is eliminated post 5.9.5, but effectively is a specialisation of GV now. diff --git a/ext/B/B.xs b/ext/B/B.xs index aa02d54..caf2265 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -37,7 +37,7 @@ static const char* const svclassnames[] = { "B::BM", #endif #if PERL_VERSION >= 11 - "B::ORANGE", + "B::REGEXP", #endif #if PERL_VERSION >= 9 "B::GV", @@ -569,6 +569,9 @@ typedef SV *B__IV; typedef SV *B__PV; typedef SV *B__NV; typedef SV *B__PVMG; +#if PERL_VERSION >= 11 +typedef SV *B__REGEXP; +#endif typedef SV *B__PVLV; typedef SV *B__BM; typedef SV *B__RV; @@ -1503,6 +1506,31 @@ B::HV SvSTASH(sv) B::PVMG sv +MODULE = B PACKAGE = B::REGEXP + +#if PERL_VERSION >= 11 + +IV +REGEX(sv) + B::PVMG sv + CODE: + RETVAL = PTR2IV(((struct xregexp *)SvANY(sv))->xrx_regexp); + OUTPUT: + RETVAL + +SV* +precomp(sv) + B::PVMG sv + REGEXP* rx = NO_INIT + CODE: + rx = ((struct xregexp *)SvANY(sv))->xrx_regexp; + /* FIXME - UTF-8? And the equivalent precomp methods? */ + RETVAL = newSVpvn( rx->precomp, rx->prelen ); + OUTPUT: + RETVAL + +#endif + #define MgMOREMAGIC(mg) mg->mg_moremagic #define MgPRIVATE(mg) mg->mg_private #define MgTYPE(mg) mg->mg_type diff --git a/ext/B/t/b.t b/ext/B/t/b.t index 0a3f245..96d8ee6 100755 --- a/ext/B/t/b.t +++ b/ext/B/t/b.t @@ -74,8 +74,11 @@ ok( B::svref_2object(\$.)->MAGIC->TYPE eq "\0", '$. has \0 magic' ); '$. has no more magic' ); } -ok(B::svref_2object(qr/foo/)->MAGIC->precomp() eq 'foo', 'Get string from qr//'); -like(B::svref_2object(qr/foo/)->MAGIC->REGEX(), qr/\d+/, "REGEX() returns numeric value"); +my $r = qr/foo/; +my $obj = B::svref_2object($r); +my $regexp = ($] < 5.011) ? $obj->MAGIC : $obj; +ok($regexp->precomp() eq 'foo', 'Get string from qr//'); +like($regexp->REGEX(), qr/\d+/, "REGEX() returns numeric value"); my $iv = 1; my $iv_ref = B::svref_2object(\$iv); is(ref $iv_ref, "B::IV", "Test B:IV return from svref_2object"); diff --git a/ext/B/typemap b/ext/B/typemap index b94d2a6..7d14ba6 100644 --- a/ext/B/typemap +++ b/ext/B/typemap @@ -17,6 +17,7 @@ B::PV T_SV_OBJ B::IV T_SV_OBJ B::NV T_SV_OBJ B::PVMG T_SV_OBJ +B::REGEXP T_SV_OBJ B::PVLV T_SV_OBJ B::BM T_SV_OBJ B::RV T_SV_OBJ diff --git a/ext/Devel/Peek/t/Peek.t b/ext/Devel/Peek/t/Peek.t index 65937e7..5700a0b 100644 --- a/ext/Devel/Peek/t/Peek.t +++ b/ext/Devel/Peek/t/Peek.t @@ -282,19 +282,12 @@ do_test(15, REFCNT = 1 FLAGS = \\(ROK\\) RV = $ADDR - SV = ORANGE\\($ADDR\\) at $ADDR + SV = REGEXP\\($ADDR\\) at $ADDR REFCNT = 1 - FLAGS = \\(OBJECT,SMG\\) + FLAGS = \\(\\) IV = 0 NV = 0 - PV = 0 - MAGIC = $ADDR - MG_VIRTUAL = $ADDR - MG_TYPE = PERL_MAGIC_qr\(r\) - MG_OBJ = $ADDR - PAT = "\(\?-xism:tic\)" - REFCNT = 2 - STASH = $ADDR\\t"Regexp"'); + PV = 0'); } else { do_test(15, qr(tic), diff --git a/lib/overload.t b/lib/overload.t index fbaa4fd..50ec4a7 100644 --- a/lib/overload.t +++ b/lib/overload.t @@ -1125,7 +1125,7 @@ like ($@, qr/zap/); like(overload::StrVal(sub{1}), qr/^CODE\(0x[0-9a-f]+\)$/); like(overload::StrVal(\*GLOB), qr/^GLOB\(0x[0-9a-f]+\)$/); like(overload::StrVal(\$o), qr/^REF\(0x[0-9a-f]+\)$/); - like(overload::StrVal(qr/a/), qr/^Regexp=ORANGE\(0x[0-9a-f]+\)$/); + like(overload::StrVal(qr/a/), qr/^Regexp\(0x[0-9a-f]+\)$/); like(overload::StrVal($o), qr/^perl31793=ARRAY\(0x[0-9a-f]+\)$/); like(overload::StrVal($of), qr/^perl31793_fb=ARRAY\(0x[0-9a-f]+\)$/); like(overload::StrVal($no), qr/^no_overload=ARRAY\(0x[0-9a-f]+\)$/); diff --git a/pp_ctl.c b/pp_ctl.c index 64157f3..2ce3a97 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -77,8 +77,7 @@ PP(pp_regcomp) dSP; register PMOP *pm = (PMOP*)cLOGOP->op_other; SV *tmpstr; - MAGIC *mg = NULL; - regexp * re; + regexp *re = NULL; /* prevent recompiling under /o and ithreads. */ #if defined(USE_ITHREADS) @@ -117,11 +116,11 @@ PP(pp_regcomp) if (SvROK(tmpstr)) { SV * const sv = SvRV(tmpstr); - if(SvMAGICAL(sv)) - mg = mg_find(sv, PERL_MAGIC_qr); + if (SvTYPE(sv) == SVt_REGEXP) + re = ((struct xregexp *)SvANY(sv))->xrx_regexp; } - if (mg) { - regexp * const re = reg_temp_copy((regexp *)mg->mg_obj); + if (re) { + re = reg_temp_copy(re); ReREFCNT_dec(PM_GETRE(pm)); PM_SETRE(pm, re); } @@ -3890,7 +3889,6 @@ S_do_smartmatch(pTHX_ HV *seen_this, HV *seen_other) SV *e = TOPs; /* e is for 'expression' */ SV *d = TOPm1s; /* d is for 'default', as in PL_defgv */ SV *This, *Other; /* 'This' (and Other to match) to play with C++ */ - MAGIC *mg; regexp *this_regex, *other_regex; # define NOT_EMPTY_PROTO(cv) (!SvPOK(cv) || SvCUR(cv) == 0) @@ -3906,24 +3904,22 @@ S_do_smartmatch(pTHX_ HV *seen_this, HV *seen_other) && NOT_EMPTY_PROTO(This) && (Other = d))) # define SM_REGEX ( \ - (SvROK(d) && SvMAGICAL(This = SvRV(d)) \ - && (mg = mg_find(This, PERL_MAGIC_qr)) \ - && (this_regex = (regexp *)mg->mg_obj) \ + (SvROK(d) && (SvTYPE(This = SvRV(d)) == SVt_REGEXP) \ + && (this_regex = ((struct xregexp *)SvANY(This))->xrx_regexp) \ && (Other = e)) \ || \ - (SvROK(e) && SvMAGICAL(This = SvRV(e)) \ - && (mg = mg_find(This, PERL_MAGIC_qr)) \ - && (this_regex = (regexp *)mg->mg_obj) \ + (SvROK(e) && (SvTYPE(This = SvRV(e)) == SVt_REGEXP) \ + && (this_regex = ((struct xregexp *)SvANY(This))->xrx_regexp) \ && (Other = d)) ) # define SM_OTHER_REF(type) \ (SvROK(Other) && SvTYPE(SvRV(Other)) == SVt_##type) -# define SM_OTHER_REGEX (SvROK(Other) && SvMAGICAL(SvRV(Other)) \ - && (mg = mg_find(SvRV(Other), PERL_MAGIC_qr)) \ - && (other_regex = (regexp *)mg->mg_obj)) - +# define SM_OTHER_REGEX (SvROK(Other) \ + && (SvTYPE(SvRV(Other)) == SVt_REGEXP) \ + && (other_regex = ((struct xregexp *)SvANY(SvRV(Other)))->xrx_regexp)) + # define SM_SEEN_THIS(sv) hv_exists_ent(seen_this, \ sv_2mortal(newSViv(PTR2IV(sv))), 0) diff --git a/pp_hot.c b/pp_hot.c index 57540ca..21582b8 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1194,11 +1194,11 @@ PP(pp_qr) REGEXP * rx = PM_GETRE(pm); SV * const pkg = CALLREG_PACKAGE(rx); SV * const rv = sv_newmortal(); - SV * const sv = newSVrv(rv, SvPV_nolen(pkg)); + SV * const sv = newSVrv(rv, pkg ? SvPV_nolen(pkg) : NULL); if (rx->extflags & RXf_TAINTED) SvTAINTED_on(rv); - sv_upgrade(sv, SVt_ORANGE); - sv_magic(sv,(SV*)ReREFCNT_inc(rx), PERL_MAGIC_qr,0,0); + sv_upgrade(sv, SVt_REGEXP); + ((struct xregexp *)SvANY(sv))->xrx_regexp = ReREFCNT_inc(rx); XPUSHs(rv); RETURN; } diff --git a/regcomp.c b/regcomp.c index 5a175ba..90b94a3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5209,7 +5209,7 @@ SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx) { PERL_UNUSED_ARG(rx); - return newSVpvs("Regexp"); + return NULL; } /* Scans the name of a named buffer from the pattern. diff --git a/regexec.c b/regexec.c index be159ed..af7a06a 100644 --- a/regexec.c +++ b/regexec.c @@ -3707,12 +3707,21 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { /* extract RE object from returned value; compiling if * necessary */ - MAGIC *mg = NULL; - const SV *sv; - if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret))) - mg = mg_find(sv, PERL_MAGIC_qr); - else if (SvSMAGICAL(ret)) { + re = NULL; + + if (SvROK(ret)) { + const SV *const sv = SvRV(ret); + + if (SvTYPE(sv) == SVt_REGEXP) { + re = ((struct xregexp *)SvANY(sv))->xrx_regexp; + } else if (SvSMAGICAL(sv)) { + mg = mg_find(sv, PERL_MAGIC_qr); + assert(mg); + } + } else if (SvTYPE(ret) == SVt_REGEXP) { + re = ((struct xregexp *)SvANY(ret))->xrx_regexp; + } else if (SvSMAGICAL(ret)) { if (SvGMAGICAL(ret)) { /* I don't believe that there is ever qr magic here. */ @@ -3730,8 +3739,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) } if (mg) { - re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/ + re = (regexp *)mg->mg_obj; /*XXX:dmq*/ + assert(re); } + if (re) + re = reg_temp_copy(re); else { U32 pm_flags = 0; const I32 osize = PL_regsize; diff --git a/sv.c b/sv.c index 585685e..3e7c3ff 100644 --- a/sv.c +++ b/sv.c @@ -916,9 +916,10 @@ static const struct body_details bodies_by_type[] = { { sizeof(XPVMG), copy_length(XPVMG, xmg_stash), 0, SVt_PVMG, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(XPVMG)) }, - /* 28 */ - { sizeof(XPVMG), copy_length(XPVMG, xmg_stash), 0, SVt_ORANGE, FALSE, HADNV, - HASARENA, FIT_ARENA(0, sizeof(XPVMG)) }, + /* 32 */ + { sizeof(struct xregexp), copy_length(struct xregexp, xrx_regexp), 0, + SVt_REGEXP, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(struct xregexp)) + }, /* 48 */ { sizeof(XPVGV), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV, @@ -1310,7 +1311,7 @@ Perl_sv_upgrade(pTHX_ register SV *sv, svtype new_type) case SVt_PVGV: case SVt_PVCV: case SVt_PVLV: - case SVt_ORANGE: + case SVt_REGEXP: case SVt_PVMG: case SVt_PVNV: case SVt_PV: @@ -2692,22 +2693,20 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) STRLEN len; char *retval; char *buffer; - MAGIC *mg; const SV *const referent = (SV*)SvRV(sv); if (!referent) { len = 7; retval = buffer = savepvn("NULLREF", len); - } else if (SvTYPE(referent) == SVt_ORANGE - && ((SvFLAGS(referent) & - (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) - == (SVs_OBJECT|SVs_SMG)) - && (mg = mg_find(referent, PERL_MAGIC_qr))) - { + } else if (SvTYPE(referent) == SVt_REGEXP) { char *str = NULL; I32 haseval = 0; U32 flags = 0; - (str) = CALLREG_AS_STR(mg,lp,&flags,&haseval); + struct magic temp; + temp.mg_obj + = (SV*)((struct xregexp *)SvANY(referent))->xrx_regexp; + assert(temp.mg_obj); + (str) = CALLREG_AS_STR(&temp,lp,&flags,&haseval); if (flags & 1) SvUTF8_on(sv); else @@ -5206,6 +5205,9 @@ Perl_sv_clear(pTHX_ register SV *sv) Safefree(IoFMT_NAME(sv)); Safefree(IoBOTTOM_NAME(sv)); goto freescalar; + case SVt_REGEXP: + ReREFCNT_dec(((struct xregexp *)SvANY(sv))->xrx_regexp); + goto freescalar; case SVt_PVCV: case SVt_PVFM: cv_undef((CV*)sv); @@ -7771,7 +7773,7 @@ Perl_sv_reftype(pTHX_ const SV *sv, int ob) case SVt_PVFM: return "FORMAT"; case SVt_PVIO: return "IO"; case SVt_BIND: return "BIND"; - case SVt_ORANGE: return "ORANGE"; + case SVt_REGEXP: return "Regexp"; /* FIXME? to "REGEXP" */ default: return "UNKNOWN"; } } @@ -10121,7 +10123,7 @@ Perl_sv_dup(pTHX_ const SV *sstr, CLONE_PARAMS* param) case SVt_PVAV: case SVt_PVCV: case SVt_PVLV: - case SVt_ORANGE: + case SVt_REGEXP: case SVt_PVMG: case SVt_PVNV: case SVt_PVIV: @@ -10176,7 +10178,10 @@ Perl_sv_dup(pTHX_ const SV *sstr, CLONE_PARAMS* param) break; case SVt_PVMG: break; - case SVt_ORANGE: + case SVt_REGEXP: + ((struct xregexp *)SvANY(dstr))->xrx_regexp + = CALLREGDUPE(((struct xregexp *)SvANY(dstr))->xrx_regexp, + param); break; case SVt_PVLV: /* XXX LvTARGOFF sometimes holds PMOP* when DEBUGGING */ diff --git a/sv.h b/sv.h index e61b260..0d361d3 100644 --- a/sv.h +++ b/sv.h @@ -53,7 +53,7 @@ typedef enum { SVt_PVIV, /* 5 */ SVt_PVNV, /* 6 */ SVt_PVMG, /* 7 */ - SVt_ORANGE, /* 8 */ + SVt_REGEXP, /* 8 */ /* PVBM was here, before BIND replaced it. */ SVt_PVGV, /* 9 */ SVt_PVLV, /* 10 */ @@ -537,6 +537,37 @@ struct xpvmg { HV* xmg_stash; /* class package */ }; +struct xregexp { + union { + NV xnv_nv; /* numeric value, if any */ + HV * xgv_stash; + struct { + U32 xlow; + U32 xhigh; + } xpad_cop_seq; /* used by pad.c for cop_sequence */ + struct { + U32 xbm_previous; /* how many characters in string before rare? */ + U8 xbm_flags; + U8 xbm_rare; /* rarest character in string */ + } xbm_s; /* fields from PVBM */ + } xnv_u; + STRLEN xpv_cur; /* length of svu_pv as a C string */ + STRLEN xpv_len; /* allocated size */ + union { + IV xivu_iv; /* integer value or pv offset */ + UV xivu_uv; + void * xivu_p1; + I32 xivu_i32; + HEK * xivu_namehek; + } xiv_u; + union { + MAGIC* xmg_magic; /* linked list of magicalness */ + HV* xmg_ourstash; /* Stash for our (when SvPAD_OUR is true) */ + } xmg_u; + HV* xmg_stash; /* class package */ + REGEXP * xrx_regexp; /* Our regular expression */ +}; + struct xpvlv { union { NV xnv_nv; /* numeric value, if any */ diff --git a/t/op/qr.t b/t/op/qr.t index f8fc32f..ff9449e 100644 --- a/t/op/qr.t +++ b/t/op/qr.t @@ -6,15 +6,8 @@ BEGIN { require './test.pl'; } -plan tests => 2; +plan tests => 1; my $rx = qr//; is(ref $rx, "Regexp", "qr// blessed into `Regexp' by default"); - -# -# DESTROY doesn't do anything in the case of qr// except make sure -# that lookups for it don't end up in AUTOLOAD lookups. But make sure -# it's there anyway. -# -ok($rx->can("DESTROY"), "DESTROY method defined for Regexp"); diff --git a/universal.c b/universal.c index fa0ccd3..7fc2ad3 100644 --- a/universal.c +++ b/universal.c @@ -205,7 +205,6 @@ XS(XS_Internals_SvREADONLY); XS(XS_Internals_SvREFCNT); XS(XS_Internals_hv_clear_placehold); XS(XS_PerlIO_get_layers); -XS(XS_Regexp_DESTROY); XS(XS_Internals_hash_seed); XS(XS_Internals_rehash_seed); XS(XS_Internals_HvREHASH); @@ -269,7 +268,6 @@ Perl_boot_core_UNIVERSAL(pTHX) XS_Internals_hv_clear_placehold, file, "\\%"); newXSproto("PerlIO::get_layers", XS_PerlIO_get_layers, file, "*;@"); - newXS("Regexp::DESTROY", XS_Regexp_DESTROY, file); newXSproto("Internals::hash_seed",XS_Internals_hash_seed, file, ""); newXSproto("Internals::rehash_seed",XS_Internals_rehash_seed, file, ""); newXSproto("Internals::HvREHASH", XS_Internals_HvREHASH, file, "\\%"); @@ -881,12 +879,6 @@ XS(XS_Internals_hv_clear_placehold) } } -XS(XS_Regexp_DESTROY) -{ - PERL_UNUSED_CONTEXT; - PERL_UNUSED_ARG(cv); -} - XS(XS_PerlIO_get_layers) { dVAR; diff --git a/util.c b/util.c index 668ddc4..fef0393 100644 --- a/util.c +++ b/util.c @@ -5914,17 +5914,15 @@ Perl_my_dirfd(pTHX_ DIR * dir) { REGEXP * Perl_get_re_arg(pTHX_ SV *sv) { SV *tmpsv; - MAGIC *mg; if (sv) { if (SvMAGICAL(sv)) mg_get(sv); if (SvROK(sv) && (tmpsv = (SV*)SvRV(sv)) && /* assign deliberate */ - SvTYPE(tmpsv) == SVt_ORANGE && - (mg = mg_find(tmpsv, PERL_MAGIC_qr))) /* assign deliberate */ + SvTYPE(tmpsv) == SVt_REGEXP) { - return (REGEXP *)mg->mg_obj; + return ((struct xregexp *)SvANY(tmpsv))->xrx_regexp; } } -- 2.7.4