From 822592eef0bdbb064d827b46a91e7bb392714f52 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jan 2013 15:47:24 -0700 Subject: [PATCH] regcomp.c: Add capability for strict [:posix:] This adds a parameter to regpposixcc() to enforce stricter rules on the posix class syntax. It is currently unused --- embed.fnc | 2 +- embed.h | 2 +- proto.h | 2 +- regcomp.c | 23 ++++++++++++++++++++--- t/porting/diag.t | 2 ++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/embed.fnc b/embed.fnc index f51e37c..88a2444 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2014,7 +2014,7 @@ EsRn |U32 |add_data |NN struct RExC_state_t *pRExC_state|U32 n \ |NN const char *s rs |void |re_croak2 |NN const char* pat1|NN const char* pat2|... Ei |I32 |regpposixcc |NN struct RExC_state_t *pRExC_state \ - |I32 value|NULLOK SV *free_me + |I32 value|NULLOK SV *free_me|const bool strict Es |I32 |make_trie |NN struct RExC_state_t *pRExC_state \ |NN regnode *startbranch|NN regnode *first \ |NN regnode *last|NN regnode *tail \ diff --git a/embed.h b/embed.h index 57e4219..eff78d4 100644 --- a/embed.h +++ b/embed.h @@ -939,7 +939,7 @@ #define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d) #define regpatws S_regpatws #define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c) -#define regpposixcc(a,b,c) S_regpposixcc(aTHX_ a,b,c) +#define regpposixcc(a,b,c,d) S_regpposixcc(aTHX_ a,b,c,d) #define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d) #define reguni(a,b,c) S_reguni(aTHX_ a,b,c) #define regwhite S_regwhite diff --git a/proto.h b/proto.h index ae55c51..6816e56 100644 --- a/proto.h +++ b/proto.h @@ -6664,7 +6664,7 @@ STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U #define PERL_ARGS_ASSERT_REGPIECE \ assert(pRExC_state); assert(flagp) -PERL_STATIC_INLINE I32 S_regpposixcc(pTHX_ struct RExC_state_t *pRExC_state, I32 value, SV *free_me) +PERL_STATIC_INLINE I32 S_regpposixcc(pTHX_ struct RExC_state_t *pRExC_state, I32 value, SV *free_me, const bool strict) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_REGPPOSIXCC \ assert(pRExC_state) diff --git a/regcomp.c b/regcomp.c index 36db53d..06d7950 100644 --- a/regcomp.c +++ b/regcomp.c @@ -11138,7 +11138,8 @@ S_regpatws( RExC_state_t *pRExC_state, char *p , const bool recognize_comment ) #define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c)) PERL_STATIC_INLINE I32 -S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me) +S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me, + const bool strict) { dVAR; I32 namedclass = OOB_NAMEDCLASS; @@ -11154,9 +11155,20 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me) while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c) RExC_parse++; - if (RExC_parse == RExC_end) + if (RExC_parse == RExC_end) { + if (strict) { + + /* Try to give a better location for the error (than the end of + * the string) by looking for the matching ']' */ + RExC_parse = s; + while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') { + RExC_parse++; + } + vFAIL2("Unmatched '%c' in POSIX class", c); + } /* Grandfather lone [:, [=, [. */ RExC_parse = s; + } else { const char* const t = RExC_parse++; /* skip over the c */ assert(*t == c); @@ -11257,6 +11269,11 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me) } else { /* Maternal grandfather: * "[:" ending in ":" but not in ":]" */ + if (strict) { + vFAIL("Unmatched '[' in POSIX class"); + } + + /* Grandfather lone [:, [=, [. */ RExC_parse = s; } } @@ -11444,7 +11461,7 @@ parseit: && RExC_parse < RExC_end && POSIXCC(UCHARAT(RExC_parse))) { - namedclass = regpposixcc(pRExC_state, value, listsv); + namedclass = regpposixcc(pRExC_state, value, listsv, FALSE); } else if (value == '\\') { if (UTF) { diff --git a/t/porting/diag.t b/t/porting/diag.t index 3901f2a..8657e97 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -629,6 +629,8 @@ Wrong syntax (suid) fd script name "%s" Useless (%s%c) - %suse /%c modifier in regex; marked by <-- HERE in m/%s/ Useless (%sc) - %suse /gc modifier in regex; marked by <-- HERE in m/%s/ Useless use of (?-p) in regex; marked by <-- HERE in m/%s/ +Unmatched '%c' in POSIX class in regex; marked by <-- HERE in m/%s/ +Unmatched '[' in POSIX class in regex; marked by <-- HERE in m/%s/ __CATEGORIES__ Code point 0x%X is not Unicode, all \p{} matches fail; all \P{} matches succeed -- 2.7.4