From c8756f30ff24381844a7b05f062074a87dc23910 Mon Sep 17 00:00:00 2001 From: Andreas Karrer Date: Wed, 11 Jun 1997 12:00:00 +1200 Subject: [PATCH] Regex Bug in 5.003_26 thru 003_99a According to Tom Christiansen: > I just *knew* I should have included stuff from my FMTEYEWTK on regexen > in the regression suites. You'll note that you've now rendered p71 of > the Camel a liar. This is considered Very Naughty. *sigh* Well, without a bug report, what I could I do?... > I'd say this is a must-fix. Here's a patch. I suppose this should go into _01. p5p-msgid: 199705161915.PAA18721@rio.atlantic.net Signed-off-by: Chip Salzenberg --- regcomp.c | 10 +++++++++- regcomp.h | 46 +++++++++++++++++++++++++--------------------- regexec.c | 15 +++++++++++++-- 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/regcomp.c b/regcomp.c index d3788c8..0e9846c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -903,7 +903,9 @@ tryagain: goto defchar; else { regsawback = 1; - ret = reganode(REF, num); + ret = reganode((regflags & PMf_FOLD) + ? ((regflags & PMf_LOCALE) ? REFFL : REFF) + : REF, num); *flagp |= HASWIDTH; while (isDIGIT(*regparse)) regparse++; @@ -1667,6 +1669,12 @@ char *op; case REF: sv_catpvf(sv, "REF%d", ARG1(op)); break; + case REFF: + sv_catpvf(sv, "REFF%d", ARG1(op)); + break; + case REFFL: + sv_catpvf(sv, "REFFL%d", ARG1(op)); + break; case OPEN: sv_catpvf(sv, "OPEN%d", ARG1(op)); break; diff --git a/regcomp.h b/regcomp.h index d618066..5915086 100644 --- a/regcomp.h +++ b/regcomp.h @@ -72,25 +72,27 @@ #define BOUNDL 21 /* no Match "" at any word boundary */ #define NBOUND 22 /* no Match "" at any word non-boundary */ #define NBOUNDL 23 /* no Match "" at any word non-boundary */ -#define REF 24 /* num Match some already matched string */ -#define OPEN 25 /* num Mark this point in input as start of #n. */ -#define CLOSE 26 /* num Analogous to OPEN. */ -#define MINMOD 27 /* no Next operator is not greedy. */ -#define GPOS 28 /* no Matches where last m//g left off. */ -#define IFMATCH 29 /* no Succeeds if the following matches. */ -#define UNLESSM 30 /* no Fails if the following matches. */ -#define SUCCEED 31 /* no Return from a subroutine, basically. */ -#define WHILEM 32 /* no Do curly processing and see if rest matches. */ -#define ALNUM 33 /* no Match any alphanumeric character */ -#define ALNUML 34 /* no Match any alphanumeric char in locale */ -#define NALNUM 35 /* no Match any non-alphanumeric character */ -#define NALNUML 36 /* no Match any non-alphanumeric char in locale */ -#define SPACE 37 /* no Match any whitespace character */ -#define SPACEL 38 /* no Match any whitespace char in locale */ -#define NSPACE 39 /* no Match any non-whitespace character */ -#define NSPACEL 40 /* no Match any non-whitespace char in locale */ -#define DIGIT 41 /* no Match any numeric character */ -#define NDIGIT 42 /* no Match any non-numeric character */ +#define REF 24 /* num Match already matched string */ +#define REFF 25 /* num Match already matched string, folded */ +#define REFFL 26 /* num Match already matched string, folded in loc. */ +#define OPEN 27 /* num Mark this point in input as start of #n. */ +#define CLOSE 28 /* num Analogous to OPEN. */ +#define MINMOD 29 /* no Next operator is not greedy. */ +#define GPOS 30 /* no Matches where last m//g left off. */ +#define IFMATCH 31 /* no Succeeds if the following matches. */ +#define UNLESSM 32 /* no Fails if the following matches. */ +#define SUCCEED 33 /* no Return from a subroutine, basically. */ +#define WHILEM 34 /* no Do curly processing and see if rest matches. */ +#define ALNUM 35 /* no Match any alphanumeric character */ +#define ALNUML 36 /* no Match any alphanumeric char in locale */ +#define NALNUM 37 /* no Match any non-alphanumeric character */ +#define NALNUML 38 /* no Match any non-alphanumeric char in locale */ +#define SPACE 39 /* no Match any whitespace character */ +#define SPACEL 40 /* no Match any whitespace char in locale */ +#define NSPACE 41 /* no Match any non-whitespace character */ +#define NSPACEL 42 /* no Match any non-whitespace char in locale */ +#define DIGIT 43 /* no Match any numeric character */ +#define NDIGIT 44 /* no Match any non-numeric character */ /* * Opcode notes: @@ -121,7 +123,7 @@ EXT char regarglen[] = { 0,0,0,0,0,0,0,0,0,0, /*CURLY*/ 4, /*CURLYX*/ 4, 0,0,0,0,0,0,0,0,0,0,0,0, - /*REF*/ 2, /*OPEN*/ 2, /*CLOSE*/ 2, + /*REF*/ 2, 2, 2, /*OPEN*/ 2, /*CLOSE*/ 2, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #endif @@ -155,6 +157,8 @@ EXT char regkind[] = { NBOUND, NBOUND, REF, + REF, + REF, OPEN, CLOSE, MINMOD, @@ -181,7 +185,7 @@ EXT char regkind[] = { EXT char varies[]; #else EXT char varies[] = { - BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, WHILEM, 0 + BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, WHILEM, 0 }; #endif diff --git a/regexec.c b/regexec.c index 630b130..7f60a91 100644 --- a/regexec.c +++ b/regexec.c @@ -825,7 +825,11 @@ char *prog; sayNO; nextchar = UCHARAT(++locinput); break; + case REFFL: + regtainted = TRUE; + /* FALL THROUGH */ case REF: + case REFF: n = ARG1(scan); /* which paren pair */ s = regstartp[n]; if (!s) @@ -835,12 +839,19 @@ char *prog; if (s == regendp[n]) break; /* Inline the first character, for speed. */ - if (UCHARAT(s) != nextchar) + if (UCHARAT(s) != nextchar && + (OP(scan) == REF || + (UCHARAT(s) != ((OP(scan) == REFF + ? fold : fold_locale)[nextchar])))) sayNO; ln = regendp[n] - s; if (locinput + ln > regeol) sayNO; - if (ln > 1 && memNE(s, locinput, ln)) + if (ln > 1 && (OP(scan) == REF + ? memNE(s, locinput, ln) + : (OP(scan) == REFF + ? ibcmp(s, locinput, ln) + : ibcmp_locale(s, locinput, ln)))) sayNO; locinput += ln; nextchar = UCHARAT(locinput); -- 2.7.4