From f6a766d53013594a5dc80a234f754c6b75c3b724 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 10 Apr 2011 16:21:55 -0600 Subject: [PATCH] PATCH: partial [perl #86972]: Allow /(?aia)/ This allows a second regex 'a' modifier in the infix form to not have to be contiguous with the first, and improves the message if there are extra modifiers. --- pod/perldiag.pod | 7 +++---- regcomp.c | 48 +++++++++++++++++++++++++++++++++++------------- t/re/re_tests | 7 +++++++ t/re/reg_mesg.t | 7 +++++-- 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 11ec27f..e9c5543 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4018,12 +4018,12 @@ expression compiler gave it. =item Regexp modifier "/%c" may not appear twice -(F syntax) The regular expression pattern had too many occurrences +(F syntax, regexp) The regular expression pattern had too many occurrences of the specified modifier. Remove the extraneous ones. =item Regexp modifiers "/%c" and "/%c" are mutually exclusive -(F syntax) The regular expression pattern had more than one of these +(F syntax, regexp) The regular expression pattern had more than one of these mutually exclusive modifiers. Retain only the modifier that is supposed to be there. @@ -4169,8 +4169,7 @@ where the problem was discovered. See L. discovered. This happens when using the C<(?^...)> construct to tell Perl to use the default regular expression modifiers, and you redundantly specify a default modifier; or having a modifier that can't -be turned off (such as C<"p"> or C<"l">) after a minus; or specifying -more than one of the C<"a>, C<"d">, C<"l">, or C<"u"> modifiers. For other +be turned off (such as C<"p"> or C<"l">) after a minus. For other causes, see L. =item Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ diff --git a/regcomp.c b/regcomp.c index ade999c..8cad5b1 100644 --- a/regcomp.c +++ b/regcomp.c @@ -7070,7 +7070,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) { U32 posflags = 0, negflags = 0; U32 *flagsp = &posflags; - bool has_charset_modifier = 0; + char has_charset_modifier = '\0'; regex_charset cs = (RExC_utf8 || RExC_uni_semantics) ? REGEX_UNICODE_CHARSET : REGEX_DEPENDS_CHARSET; @@ -7082,40 +7082,50 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) switch (*RExC_parse) { CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp); case LOCALE_PAT_MOD: - if (has_charset_modifier || flagsp == &negflags) { + if (has_charset_modifier) { + goto excess_modifier; + } + else if (flagsp == &negflags) { goto fail_modifiers; } cs = REGEX_LOCALE_CHARSET; - has_charset_modifier = 1; + has_charset_modifier = LOCALE_PAT_MOD; RExC_contains_locale = 1; break; case UNICODE_PAT_MOD: - if (has_charset_modifier || flagsp == &negflags) { + if (has_charset_modifier) { + goto excess_modifier; + } + else if (flagsp == &negflags) { goto fail_modifiers; } cs = REGEX_UNICODE_CHARSET; - has_charset_modifier = 1; + has_charset_modifier = UNICODE_PAT_MOD; break; case ASCII_RESTRICT_PAT_MOD: - if (has_charset_modifier || flagsp == &negflags) { + if (flagsp == &negflags) { goto fail_modifiers; } - if (*(RExC_parse + 1) == ASCII_RESTRICT_PAT_MOD) { + if (has_charset_modifier) { + if (cs != REGEX_ASCII_RESTRICTED_CHARSET) { + goto excess_modifier; + } /* Doubled modifier implies more restricted */ - cs = REGEX_ASCII_MORE_RESTRICTED_CHARSET; - RExC_parse++; - } + cs = REGEX_ASCII_MORE_RESTRICTED_CHARSET; + } else { cs = REGEX_ASCII_RESTRICTED_CHARSET; } - has_charset_modifier = 1; + has_charset_modifier = ASCII_RESTRICT_PAT_MOD; break; case DEPENDS_PAT_MOD: if (has_use_defaults - || has_charset_modifier || flagsp == &negflags) { goto fail_modifiers; + } + else if (has_charset_modifier) { + goto excess_modifier; } /* The dual charset means unicode semantics if the @@ -7125,8 +7135,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) cs = (RExC_utf8 || RExC_uni_semantics) ? REGEX_UNICODE_CHARSET : REGEX_DEPENDS_CHARSET; - has_charset_modifier = 1; + has_charset_modifier = DEPENDS_PAT_MOD; break; + excess_modifier: + RExC_parse++; + if (has_charset_modifier == ASCII_RESTRICT_PAT_MOD) { + vFAIL2("Regexp modifier \"/%c\" may appear a maximum of twice", ASCII_RESTRICT_PAT_MOD); + } + else if (has_charset_modifier == *(RExC_parse - 1)) { + vFAIL2("Regexp modifier \"/%c\" may not appear twice", *(RExC_parse - 1)); + } + else { + vFAIL3("Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", has_charset_modifier, *(RExC_parse - 1)); + } + /*NOTREACHED*/ case ONCE_PAT_MOD: /* 'o' */ case GLOBAL_PAT_MOD: /* 'g' */ if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { diff --git a/t/re/re_tests b/t/re/re_tests index af24677..9d5341b 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1510,4 +1510,11 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer ^m?(\D)(.*)\1$ 5b5 n - - ^m?(\d)(.*)\1$ 5b5 y $1 5 ^m?(\d)(.*)\1$ aba n - - + +# 17F is 'Long s'; This makes sure the a's in /aa can be separate +/s/ai \x{17F} y $& \x{17F} +/s/aia \x{17F} n - - +/s/aia S y $& S +/(?aia:s)/ \x{17F} n - - +/(?aia:s)/ S y $& S # vim: softtabstop=0 noexpandtab diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 74b264a..827900f 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -64,8 +64,11 @@ my @death = '/(?^-i)foo/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i)foo/', '/(?^d:foo)/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#}:foo)/', '/(?^d)foo/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#})foo/', - '/(?^lu:foo)/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#}:foo)/', - '/(?^lu)foo/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#})foo/', + '/(?^lu:foo)/' => 'Regexp modifiers "/l" and "/u" are mutually exclusive in regex; marked by {#} in m/(?^lu{#}:foo)/', + '/(?^lu)foo/' => 'Regexp modifiers "/l" and "/u" are mutually exclusive in regex; marked by {#} in m/(?^lu{#})foo/', +'/(?da:foo)/' => 'Regexp modifiers "/d" and "/a" are mutually exclusive in regex; marked by {#} in m/(?da{#}:foo)/', +'/(?lil:foo)/' => 'Regexp modifier "/l" may not appear twice in regex; marked by {#} in m/(?lil{#}:foo)/', +'/(?aaia:foo)/' => 'Regexp modifier "/a" may appear a maximum of twice in regex; marked by {#} in m/(?aaia{#}:foo)/', '/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/', -- 2.7.4