From 421e43ba3017755892f18a5690b66a6ed8717fa9 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 5 Feb 2014 14:54:47 -0700 Subject: [PATCH] Forbid "\c{" and \c{non-ascii} These constructs have been deprecated since v5.14 with the intention of making them fatal in 5.18. This wasn't done; and is being done now. --- dquote_static.c | 34 ++++++++++++++++------------------ embed.fnc | 2 +- embed.h | 2 +- pod/perldiag.pod | 15 +++++++-------- proto.h | 2 +- regcomp.c | 4 ++-- t/lib/warnings/toke | 16 +++++++++------- toke.c | 2 +- 8 files changed, 38 insertions(+), 39 deletions(-) diff --git a/dquote_static.c b/dquote_static.c index da1b5b9..76fb86d 100644 --- a/dquote_static.c +++ b/dquote_static.c @@ -46,31 +46,30 @@ S_regcurly(pTHX_ const char *s, */ STATIC char -S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning) +S_grok_bslash_c(pTHX_ const char source, const bool output_warning) { U8 result; - if (utf8) { - /* Trying to deprecate non-ASCII usages. This construct has never - * worked for a utf8 variant. So, even though are accepting non-ASCII - * Latin1 in 5.14, no need to make them work under utf8 */ - if (! isASCII(source)) { - Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII"); + if (! isPRINT_A(source)) { + const char msg[] = "Character following \"\\c\" must be printable ASCII"; + if (! isASCII(source)) { + Perl_croak(aTHX_ "%s", msg); + } + else if (output_warning) { /* Unprintables can be removed in v5.22 */ + Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), "%s", + msg); } } + else if (source == '{') { + assert(isPRINT_A(toCTRL('{'))); + Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", toCTRL('{')); + } result = toCTRL(source); - if (! isASCII(source)) { - Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), - "Character following \"\\c\" must be ASCII"); - } - else if (! isCNTRL(result) && output_warning) { - if (source == '{') { - Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), - "\"\\c{\" is deprecated and is more clearly written as \";\""); - } - else { + if (output_warning && ! isCNTRL_L1(result)) { + /* We use isCNTRL_L1 above and not simply isCNTRL, because on EBCDIC + * machines, things like \cT map into a C1 control. */ U8 clearer[3]; U8 i = 0; if (! isWORDCHAR(result)) { @@ -83,7 +82,6 @@ S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warn "\"\\c%c\" is more clearly written simply as \"%s\"", source, clearer); - } } return result; diff --git a/embed.fnc b/embed.fnc index a166dc7..0d3ae6a 100644 --- a/embed.fnc +++ b/embed.fnc @@ -751,7 +751,7 @@ p |OP* |localize |NN OP *o|I32 lex ApdR |I32 |looks_like_number|NN SV *const sv Apd |UV |grok_bin |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) -EMsR |char |grok_bslash_c |const char source|const bool utf8|const bool output_warning +EMsR |char |grok_bslash_c |const char source|const bool output_warning EMsR |bool |grok_bslash_o |NN char** s|NN UV* uv \ |NN const char** error_msg \ |const bool output_warning \ diff --git a/embed.h b/embed.h index d172248..269d972 100644 --- a/embed.h +++ b/embed.h @@ -980,7 +980,7 @@ # endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) #define form_short_octal_warning(a,b) S_form_short_octal_warning(aTHX_ a,b) -#define grok_bslash_c(a,b,c) S_grok_bslash_c(aTHX_ a,b,c) +#define grok_bslash_c(a,b) S_grok_bslash_c(aTHX_ a,b) #define grok_bslash_o(a,b,c,d,e,f,g) S_grok_bslash_o(aTHX_ a,b,c,d,e,f,g) #define grok_bslash_x(a,b,c,d,e,f,g) S_grok_bslash_x(aTHX_ a,b,c,d,e,f,g) #define regcurly(a,b) S_regcurly(aTHX_ a,b) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 37acf2b..d913fa6 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1387,14 +1387,6 @@ uses the character values modulus 256 instead, as if you had provided: unpack("s", "\x{f3}b") -=item "\c{" is deprecated and is more clearly written as ";" - -(D deprecated, syntax) The C<\cI> construct is intended to be a way -to specify non-printable characters. You used it with a "{" which -evaluates to ";", which is printable. It is planned to remove the -ability to specify a semi-colon this way in Perl 5.20. Just use a -semi-colon or a backslash-semi-colon without the "\c". - =item "\c%c" is more clearly written simply as "%s" (W syntax) The C<\cI> construct is intended to be a way to specify @@ -6170,6 +6162,13 @@ you can write it as C to avoid this warning. (F) The "use" keyword is recognized and executed at compile time, and returns no useful value. See L. +=item Use "%c" instead of "\c{" + +(F) The C<\cI> construct is intended to be a way to specify +non-printable characters. You used it with a C<"{"> which evaluates to +C<";">, which is printable. On ASCII platforms, just use a semi-colon +or a backslash-semi-colon without the C<"\c">. + =item Use of assignment to $[ is deprecated (D deprecated) The C<$[> variable (index of the first element in an array) diff --git a/proto.h b/proto.h index c4c54d1..3ff37f0 100644 --- a/proto.h +++ b/proto.h @@ -7038,7 +7038,7 @@ STATIC char* S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN #define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING \ assert(s) -STATIC char S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning) +STATIC char S_grok_bslash_c(pTHX_ const char source, const bool output_warning) __attribute__warn_unused_result__; STATIC bool S_grok_bslash_o(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool silence_non_portable, const bool utf8) diff --git a/regcomp.c b/regcomp.c index 0951fa5..919035d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -11785,7 +11785,7 @@ tryagain: } case 'c': p++; - ender = grok_bslash_c(*p++, UTF, SIZE_ONLY); + ender = grok_bslash_c(*p++, SIZE_ONLY); break; case '8': case '9': /* must be a backreference */ --p; @@ -13528,7 +13528,7 @@ parseit: goto recode_encoding; break; case 'c': - value = grok_bslash_c(*RExC_parse++, UTF, SIZE_ONLY); + value = grok_bslash_c(*RExC_parse++, SIZE_ONLY); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 02c6ccc..5a38ba8 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -1361,19 +1361,21 @@ no ######## # toke.c use warnings; -my $a = "\c{ack}"; $a = "\c,"; $a = "\c`"; no warnings 'syntax'; -$a = "\c{ack}"; $a = "\c,"; $a = "\c`"; -no warnings 'deprecated'; EXPECT -"\c{" is deprecated and is more clearly written as ";" at - line 3. -"\c," is more clearly written simply as "l" at - line 4. -"\c`" is more clearly written simply as "\ " at - line 5. -"\c{" is deprecated and is more clearly written as ";" at - line 7. +"\c," is more clearly written simply as "l" at - line 3. +"\c`" is more clearly written simply as "\ " at - line 4. +######## +# toke.c +use warnings; +my $a = "\c{ack}"; +EXPECT +OPTION fatal +Use ";" instead of "\c{" at - line 3. ######## # toke.c use warnings 'syntax' ; diff --git a/toke.c b/toke.c index 290af07..766685d 100644 --- a/toke.c +++ b/toke.c @@ -3777,7 +3777,7 @@ S_scan_const(pTHX_ char *start) case 'c': s++; if (s < send) { - *d++ = grok_bslash_c(*s++, has_utf8, 1); + *d++ = grok_bslash_c(*s++, 1); } else { yyerror("Missing control char name in \\c"); -- 2.7.4