From 8316ead628e0766f9c676352d8fa10eddbede091 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jan 2013 15:03:39 -0700 Subject: [PATCH] regcomp.c: Add parameter to regclass() This parameter allows the caller to specify whether multi-character folds should be allowed or not. In general it should, and in the case where this commit says it shouldn't, they never are returned anyway from Unicode properties. This capability will be put to real use by future commits --- embed.fnc | 3 ++- embed.h | 2 +- proto.h | 2 +- regcomp.c | 12 ++++++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/embed.fnc b/embed.fnc index e6bb9bc..4d1d81f 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1960,7 +1960,8 @@ Es |regnode*|regbranch |NN struct RExC_state_t *pRExC_state \ Es |STRLEN |reguni |NN const struct RExC_state_t *pRExC_state \ |UV uv|NN char *s Es |regnode*|regclass |NN struct RExC_state_t *pRExC_state \ - |NN I32 *flagp|U32 depth|const bool stop_at_1 + |NN I32 *flagp|U32 depth|const bool stop_at_1 \ + |bool allow_multi_fold Es |regnode*|reg_node |NN struct RExC_state_t *pRExC_state|U8 op Es |UV |reg_recode |const char value|NN SV **encp Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \ diff --git a/embed.h b/embed.h index 86d9006..3f4034e 100644 --- a/embed.h +++ b/embed.h @@ -935,7 +935,7 @@ #define reganode(a,b,c) S_reganode(aTHX_ a,b,c) #define regatom(a,b,c) S_regatom(aTHX_ a,b,c) #define regbranch(a,b,c,d) S_regbranch(aTHX_ a,b,c,d) -#define regclass(a,b,c,d) S_regclass(aTHX_ a,b,c,d) +#define regclass(a,b,c,d,e) S_regclass(aTHX_ a,b,c,d,e) #define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d) #define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c) #define regpposixcc(a,b,c) S_regpposixcc(aTHX_ a,b,c) diff --git a/proto.h b/proto.h index 40784c2..9bb1228 100644 --- a/proto.h +++ b/proto.h @@ -6639,7 +6639,7 @@ STATIC regnode* S_regbranch(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, #define PERL_ARGS_ASSERT_REGBRANCH \ assert(pRExC_state); assert(flagp) -STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1) +STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); #define PERL_ARGS_ASSERT_REGCLASS \ diff --git a/regcomp.c b/regcomp.c index 1d411cb..5c52f77 100644 --- a/regcomp.c +++ b/regcomp.c @@ -10106,7 +10106,8 @@ tryagain: { char * const oregcomp_parse = ++RExC_parse; ret = regclass(pRExC_state, flagp,depth+1, - FALSE /* means parse the whole char class */ ); + FALSE, /* means parse the whole char class */ + TRUE); /* allow multi-char folds */ if (*RExC_parse != ']') { RExC_parse = oregcomp_parse; vFAIL("Unmatched ["); @@ -10301,7 +10302,8 @@ tryagain: RExC_parse--; ret = regclass(pRExC_state, flagp,depth+1, - TRUE /* means just parse this element */ ); + TRUE, /* means just parse this element */ + FALSE); /* don't allow multi-char folds */ RExC_parse--; @@ -11229,7 +11231,8 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me) #define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len) STATIC regnode * -S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1) +S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, + const bool stop_at_1, bool allow_multi_folds) { /* parse a bracketed class specification. Most of these will produce an ANYOF node; * but something like [a] will produce an EXACT node; [aA], an EXACTFish @@ -11325,6 +11328,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool st if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */ RExC_parse++; invert = TRUE; + allow_multi_folds = FALSE; RExC_naughty++; } @@ -12009,7 +12013,7 @@ parseit: * "ss" =~ /^[^\xDF]+$/i => N * * See [perl #89750] */ - if (FOLD && ! invert && value == prevvalue) { + if (FOLD && allow_multi_folds && value == prevvalue) { if (value == LATIN_SMALL_LETTER_SHARP_S || (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold, value))) -- 2.7.4