Forbid "\c{" and \c{non-ascii}

author Karl Williamson <public@khwilliamson.com>

Wed, 5 Feb 2014 21:54:47 +0000 (14:54 -0700)

committer Karl Williamson <public@khwilliamson.com>

Wed, 5 Feb 2014 22:47:05 +0000 (15:47 -0700)
author Karl Williamson <public@khwilliamson.com>
Wed, 5 Feb 2014 21:54:47 +0000 (14:54 -0700)
committer Karl Williamson <public@khwilliamson.com>
Wed, 5 Feb 2014 22:47:05 +0000 (15:47 -0700)
diff --git a/dquote_static.c b/dquote_static.c

index da1b5b9..76fb86d 100644 (file)
--- a/dquote_static.c
+++ b/dquote_static.c
@@ -46,31 +46,30 @@ S_regcurly(pTHX_ const char *s,
  */
  
  STATIC char
-S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
+S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
  {
  
      U8 result;
  
-    if (utf8) {
-       /* Trying to deprecate non-ASCII usages.  This construct has never
-        * worked for a utf8 variant.  So, even though are accepting non-ASCII
-        * Latin1 in 5.14, no need to make them work under utf8 */
-       if (! isASCII(source)) {
-           Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII");
+    if (! isPRINT_A(source)) {
+        const char msg[] = "Character following \"\\c\" must be printable ASCII";
+        if (! isASCII(source)) {
+            Perl_croak(aTHX_ "%s", msg);
+        }
+        else if (output_warning) {  /* Unprintables can be removed in v5.22 */
+            Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), "%s",
+                                                                            msg);
         }
      }
+    else if (source == '{') {
+        assert(isPRINT_A(toCTRL('{')));
+        Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", toCTRL('{'));
+    }
  
      result = toCTRL(source);
-    if (! isASCII(source)) {
-           Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
-                           "Character following \"\\c\" must be ASCII");
-    }
-    else if (! isCNTRL(result) && output_warning) {
-       if (source == '{') {
-           Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
-                           "\"\\c{\" is deprecated and is more clearly written as \";\"");
-       }
-       else {
+    if (output_warning && ! isCNTRL_L1(result)) {
+        /* We use isCNTRL_L1 above and not simply isCNTRL, because on EBCDIC
+         * machines, things like \cT map into a C1 control. */
             U8 clearer[3];
             U8 i = 0;
             if (! isWORDCHAR(result)) {
@@ -83,7 +82,6 @@ S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warn
                             "\"\\c%c\" is more clearly written simply as \"%s\"",
                             source,
                             clearer);
-       }
      }
  
      return result;
diff --git a/embed.fnc b/embed.fnc

index a166dc7..0d3ae6a 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -751,7 +751,7 @@ p   |OP*    |localize       |NN OP *o|I32 lex
  ApdR   |I32    |looks_like_number|NN SV *const sv
  Apd    |UV     |grok_bin       |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
  #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
-EMsR   |char   |grok_bslash_c  |const char source|const bool utf8|const bool output_warning
+EMsR   |char   |grok_bslash_c  |const char source|const bool output_warning
  EMsR   |bool   |grok_bslash_o  |NN char** s|NN UV* uv           \
                                 |NN const char** error_msg       \
                                 |const bool output_warning       \
diff --git a/embed.h b/embed.h

index d172248..269d972 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -980,7 +980,7 @@
  #  endif
  #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
  #define form_short_octal_warning(a,b)  S_form_short_octal_warning(aTHX_ a,b)
-#define grok_bslash_c(a,b,c)   S_grok_bslash_c(aTHX_ a,b,c)
+#define grok_bslash_c(a,b)     S_grok_bslash_c(aTHX_ a,b)
  #define grok_bslash_o(a,b,c,d,e,f,g)   S_grok_bslash_o(aTHX_ a,b,c,d,e,f,g)
  #define grok_bslash_x(a,b,c,d,e,f,g)   S_grok_bslash_x(aTHX_ a,b,c,d,e,f,g)
  #define regcurly(a,b)          S_regcurly(aTHX_ a,b)
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index 37acf2b..d913fa6 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1387,14 +1387,6 @@ uses the character values modulus 256 instead, as if you had provided:
  
     unpack("s", "\x{f3}b")
  
-=item "\c{" is deprecated and is more clearly written as ";"
-
-(D deprecated, syntax) The C<\cI<X>> construct is intended to be a way
-to specify non-printable characters.  You used it with a "{" which
-evaluates to ";", which is printable.  It is planned to remove the
-ability to specify a semi-colon this way in Perl 5.20.  Just use a
-semi-colon or a backslash-semi-colon without the "\c".
-
  =item "\c%c" is more clearly written simply as "%s"
  
  (W syntax) The C<\cI<X>> construct is intended to be a way to specify
@@ -6170,6 +6162,13 @@ you can write it as C<push(@tied_array,())> to avoid this warning.
  (F) The "use" keyword is recognized and executed at compile time, and
  returns no useful value.  See L<perlmod>.
  
+=item Use "%c" instead of "\c{"
+
+(F) The C<\cI<X>> construct is intended to be a way to specify
+non-printable characters.  You used it with a C<"{"> which evaluates to
+C<";">, which is printable.  On ASCII platforms, just use a semi-colon
+or a backslash-semi-colon without the C<"\c">.
+
  =item Use of assignment to $[ is deprecated
  
  (D deprecated) The C<$[> variable (index of the first element in an array)
diff --git a/proto.h b/proto.h

index c4c54d1..3ff37f0 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -7038,7 +7038,7 @@ STATIC char*      S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN
  #define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING      \
         assert(s)
  
-STATIC char    S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
+STATIC char    S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
                         __attribute__warn_unused_result__;
  
  STATIC bool    S_grok_bslash_o(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool silence_non_portable, const bool utf8)
diff --git a/regcomp.c b/regcomp.c

index 0951fa5..919035d 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -11785,7 +11785,7 @@ tryagain:
                         }
                     case 'c':
                         p++;
-                       ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
+                       ender = grok_bslash_c(*p++, SIZE_ONLY);
                         break;
                      case '8': case '9': /* must be a backreference */
                          --p;
@@ -13528,7 +13528,7 @@ parseit:
                     goto recode_encoding;
                 break;
             case 'c':
-               value = grok_bslash_c(*RExC_parse++, UTF, SIZE_ONLY);
+               value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
                 break;
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7':
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke

index 02c6ccc..5a38ba8 100644 (file)
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -1361,19 +1361,21 @@ no
  ########
  # toke.c
  use warnings;
-my $a = "\c{ack}";
  $a = "\c,";
  $a = "\c`";
  no warnings 'syntax';
-$a = "\c{ack}";
  $a = "\c,";
  $a = "\c`";
-no warnings 'deprecated';
  EXPECT
-"\c{" is deprecated and is more clearly written as ";" at - line 3.
-"\c," is more clearly written simply as "l" at - line 4.
-"\c`" is more clearly written simply as "\ " at - line 5.
-"\c{" is deprecated and is more clearly written as ";" at - line 7.
+"\c," is more clearly written simply as "l" at - line 3.
+"\c`" is more clearly written simply as "\ " at - line 4.
+########
+# toke.c
+use warnings;
+my $a = "\c{ack}";
+EXPECT
+OPTION fatal
+Use ";" instead of "\c{" at - line 3.
  ########
  # toke.c
  use warnings 'syntax' ;
diff --git a/toke.c b/toke.c

index 290af07..766685d 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -3777,7 +3777,7 @@ S_scan_const(pTHX_ char *start)
             case 'c':
                 s++;
                 if (s < send) {
-                   *d++ = grok_bslash_c(*s++, has_utf8, 1);
+                   *d++ = grok_bslash_c(*s++, 1);
                 }
                 else {
                     yyerror("Missing control char name in \\c");
author	Karl Williamson <public@khwilliamson.com>
	Wed, 5 Feb 2014 21:54:47 +0000 (14:54 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Wed, 5 Feb 2014 22:47:05 +0000 (15:47 -0700)
dquote_static.c		patch \| blob \| history
embed.fnc		patch \| blob \| history
embed.h		patch \| blob \| history
pod/perldiag.pod		patch \| blob \| history
proto.h		patch \| blob \| history
regcomp.c		patch \| blob \| history
t/lib/warnings/toke		patch \| blob \| history
toke.c		patch \| blob \| history