From e357fc671749d483a102a060a774d41a9b256b46 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Tue, 9 Jan 2007 01:46:31 +0100 Subject: [PATCH] Let the regex parser decide if we have a special pattern or not. Message-ID: <9b18b3110701081546n2c3f19acy29adc0d233bb848c@mail.gmail.com> p4raw-id: //depot/perl@29730 --- op.c | 4 +++- op.h | 8 +++++--- pp.c | 2 +- pp_ctl.c | 2 +- regcomp.c | 5 +++++ regexp.h | 30 ++++++++++++++++++------------ 6 files changed, 33 insertions(+), 18 deletions(-) diff --git a/op.c b/op.c index a495db8..df2e90b 100644 --- a/op.c +++ b/op.c @@ -3256,8 +3256,10 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) pm->op_pmdynflags |= PMdf_UTF8; /* FIXME - can we make this function take const char * args? */ PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm)); - if (strEQ("\\s+", PM_GETRE(pm)->precomp)) + if (PM_GETRE(pm)->extflags & RXf_WHITE) pm->op_pmflags |= PMf_WHITE; + else + pm->op_pmflags &= ~PMf_WHITE; #ifdef PERL_MAD op_getmad(expr,(OP*)pm,'e'); #else diff --git a/op.h b/op.h index f84f123..1ac4aa0 100644 --- a/op.h +++ b/op.h @@ -367,15 +367,17 @@ struct pmop { #define PMf_GLOBAL 0x0100 /* pattern had a g modifier */ #define PMf_CONTINUE 0x0200 /* don't reset pos() if //g fails */ #define PMf_EVAL 0x0400 /* evaluating replacement as expr */ + +/* The following flags have exact equivalents in regcomp.h with the prefix RXf_ + * which are stored in the regexp->extflags member. + */ #define PMf_LOCALE 0x0800 /* use locale for character types */ #define PMf_MULTILINE 0x1000 /* assume multiple lines */ #define PMf_SINGLELINE 0x2000 /* assume single line */ #define PMf_FOLD 0x4000 /* case insensitivity */ #define PMf_EXTENDED 0x8000 /* chuck embedded whitespace */ -/* mask of bits stored in regexp->extflags - these all are also called RXf_PMf_xyz - */ +/* mask of bits that need to be transfered to re->extflags */ #define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED) #ifdef USE_ITHREADS diff --git a/pp.c b/pp.c index 4505890..4523584 100644 --- a/pp.c +++ b/pp.c @@ -4629,7 +4629,7 @@ PP(pp_split) ++s; } } - else if (rx->precomp[0] == '^' && rx->precomp[1] == '\0') { + else if (rx->extflags & RXf_START_ONLY) { while (--limit) { for (m = s; m < strend && *m != '\n'; m++) ; diff --git a/pp_ctl.c b/pp_ctl.c index 8506daa..b439c3e 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -177,7 +177,7 @@ PP(pp_regcomp) if (!PM_GETRE(pm)->prelen && PL_curpm) pm = PL_curpm; - else if (strEQ("\\s+", PM_GETRE(pm)->precomp)) + else if (PM_GETRE(pm)->extflags & RXf_WHITE) pm->op_pmflags |= PMf_WHITE; else pm->op_pmflags &= ~PMf_WHITE; diff --git a/regcomp.c b/regcomp.c index 8772237..c1c141a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4582,6 +4582,11 @@ reStudy: r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names); else r->paren_names = NULL; + if (r->prelen == 3 && strEQ("\\s+", r->precomp)) + r->extflags |= RXf_WHITE; + else if (r->prelen == 1 && r->precomp[0] == '^') + r->extflags |= RXf_START_ONLY; + #ifdef DEBUGGING if (RExC_paren_names) { ri->name_list_idx = add_data( pRExC_state, 1, "p" ); diff --git a/regexp.h b/regexp.h index f7c9833..d02b321 100644 --- a/regexp.h +++ b/regexp.h @@ -116,6 +116,10 @@ typedef struct regexp_engine { /* Flags stored in regexp->extflags * These are used by code external to the regexp engine + * + * Note that flags starting with RXf_PMf_ have exact equivalents + * stored in op_pmflags and which are defined in op.h, they are defined + * numerically here only for clarity. */ /* Anchor and GPOS related stuff */ @@ -125,20 +129,22 @@ typedef struct regexp_engine { #define RXf_ANCH_GPOS 0x00000008 #define RXf_GPOS_SEEN 0x00000010 #define RXf_GPOS_FLOAT 0x00000020 -/* five bits here */ +/* two bits here */ #define RXf_ANCH (RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL) #define RXf_GPOS_CHECK (RXf_GPOS_SEEN|RXf_ANCH_GPOS) -#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) -/* - * 0xF800 of extflags is used by PMf_COMPILETIME - * These are the regex equivelent of the PMf_xyz stuff defined - * in op.h - */ -#define RXf_PMf_LOCALE 0x00000800 -#define RXf_PMf_MULTILINE 0x00001000 -#define RXf_PMf_SINGLELINE 0x00002000 -#define RXf_PMf_FOLD 0x00004000 -#define RXf_PMf_EXTENDED 0x00008000 +#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) + +/* Flags indicating special patterns */ +#define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */ +#define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */ + +/* 0xF800 of extflags is used by (RXf_)PMf_COMPILETIME */ +#define RXf_PMf_LOCALE 0x00000800 /* use locale */ +#define RXf_PMf_MULTILINE 0x00001000 /* /m */ +#define RXf_PMf_SINGLELINE 0x00002000 /* /s */ +#define RXf_PMf_FOLD 0x00004000 /* /i */ +#define RXf_PMf_EXTENDED 0x00008000 /* /x */ +/* these flags are transfered from the PMOP->op_pmflags member during compilation */ #define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED) /* What we have seen */ -- 2.7.4