From 421e43ba3017755892f18a5690b66a6ed8717fa9 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Wed, 5 Feb 2014 14:54:47 -0700
Subject: [PATCH] Forbid "\c{" and \c{non-ascii}

These constructs have been deprecated since v5.14 with the intention of
making them fatal in 5.18.  This wasn't done; and is being done now.
---
 dquote_static.c     | 34 ++++++++++++++++------------------
 embed.fnc           |  2 +-
 embed.h             |  2 +-
 pod/perldiag.pod    | 15 +++++++--------
 proto.h             |  2 +-
 regcomp.c           |  4 ++--
 t/lib/warnings/toke | 16 +++++++++-------
 toke.c              |  2 +-
 8 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/dquote_static.c b/dquote_static.c
index da1b5b9..76fb86d 100644
--- a/dquote_static.c
+++ b/dquote_static.c
@@ -46,31 +46,30 @@ S_regcurly(pTHX_ const char *s,
 */
 
 STATIC char
-S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
+S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
 {
 
     U8 result;
 
-    if (utf8) {
-	/* Trying to deprecate non-ASCII usages.  This construct has never
-	 * worked for a utf8 variant.  So, even though are accepting non-ASCII
-	 * Latin1 in 5.14, no need to make them work under utf8 */
-	if (! isASCII(source)) {
-	    Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII");
+    if (! isPRINT_A(source)) {
+        const char msg[] = "Character following \"\\c\" must be printable ASCII";
+        if (! isASCII(source)) {
+            Perl_croak(aTHX_ "%s", msg);
+        }
+        else if (output_warning) {  /* Unprintables can be removed in v5.22 */
+            Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), "%s",
+                                                                            msg);
 	}
     }
+    else if (source == '{') {
+        assert(isPRINT_A(toCTRL('{')));
+        Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", toCTRL('{'));
+    }
 
     result = toCTRL(source);
-    if (! isASCII(source)) {
-	    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
-			    "Character following \"\\c\" must be ASCII");
-    }
-    else if (! isCNTRL(result) && output_warning) {
-	if (source == '{') {
-	    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
-			    "\"\\c{\" is deprecated and is more clearly written as \";\"");
-	}
-	else {
+    if (output_warning && ! isCNTRL_L1(result)) {
+        /* We use isCNTRL_L1 above and not simply isCNTRL, because on EBCDIC
+         * machines, things like \cT map into a C1 control. */
 	    U8 clearer[3];
 	    U8 i = 0;
 	    if (! isWORDCHAR(result)) {
@@ -83,7 +82,6 @@ S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warn
 			    "\"\\c%c\" is more clearly written simply as \"%s\"",
 			    source,
 			    clearer);
-	}
     }
 
     return result;
diff --git a/embed.fnc b/embed.fnc
index a166dc7..0d3ae6a 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -751,7 +751,7 @@ p	|OP*	|localize	|NN OP *o|I32 lex
 ApdR	|I32	|looks_like_number|NN SV *const sv
 Apd	|UV	|grok_bin	|NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
-EMsR	|char	|grok_bslash_c	|const char source|const bool utf8|const bool output_warning
+EMsR	|char	|grok_bslash_c	|const char source|const bool output_warning
 EMsR	|bool	|grok_bslash_o	|NN char** s|NN UV* uv           \
 				|NN const char** error_msg       \
 				|const bool output_warning       \
diff --git a/embed.h b/embed.h
index d172248..269d972 100644
--- a/embed.h
+++ b/embed.h
@@ -980,7 +980,7 @@
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
 #define form_short_octal_warning(a,b)	S_form_short_octal_warning(aTHX_ a,b)
-#define grok_bslash_c(a,b,c)	S_grok_bslash_c(aTHX_ a,b,c)
+#define grok_bslash_c(a,b)	S_grok_bslash_c(aTHX_ a,b)
 #define grok_bslash_o(a,b,c,d,e,f,g)	S_grok_bslash_o(aTHX_ a,b,c,d,e,f,g)
 #define grok_bslash_x(a,b,c,d,e,f,g)	S_grok_bslash_x(aTHX_ a,b,c,d,e,f,g)
 #define regcurly(a,b)		S_regcurly(aTHX_ a,b)
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 37acf2b..d913fa6 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1387,14 +1387,6 @@ uses the character values modulus 256 instead, as if you had provided:
 
    unpack("s", "\x{f3}b")
 
-=item "\c{" is deprecated and is more clearly written as ";"
-
-(D deprecated, syntax) The C<\cI<X>> construct is intended to be a way
-to specify non-printable characters.  You used it with a "{" which
-evaluates to ";", which is printable.  It is planned to remove the
-ability to specify a semi-colon this way in Perl 5.20.  Just use a
-semi-colon or a backslash-semi-colon without the "\c".
-
 =item "\c%c" is more clearly written simply as "%s"
 
 (W syntax) The C<\cI<X>> construct is intended to be a way to specify
@@ -6170,6 +6162,13 @@ you can write it as C<push(@tied_array,())> to avoid this warning.
 (F) The "use" keyword is recognized and executed at compile time, and
 returns no useful value.  See L<perlmod>.
 
+=item Use "%c" instead of "\c{"
+
+(F) The C<\cI<X>> construct is intended to be a way to specify
+non-printable characters.  You used it with a C<"{"> which evaluates to
+C<";">, which is printable.  On ASCII platforms, just use a semi-colon
+or a backslash-semi-colon without the C<"\c">.
+
 =item Use of assignment to $[ is deprecated
 
 (D deprecated) The C<$[> variable (index of the first element in an array)
diff --git a/proto.h b/proto.h
index c4c54d1..3ff37f0 100644
--- a/proto.h
+++ b/proto.h
@@ -7038,7 +7038,7 @@ STATIC char*	S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN
 #define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING	\
 	assert(s)
 
-STATIC char	S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
+STATIC char	S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
 			__attribute__warn_unused_result__;
 
 STATIC bool	S_grok_bslash_o(pTHX_ char** s, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool silence_non_portable, const bool utf8)
diff --git a/regcomp.c b/regcomp.c
index 0951fa5..919035d 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -11785,7 +11785,7 @@ tryagain:
 			}
 		    case 'c':
 			p++;
-			ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
+			ender = grok_bslash_c(*p++, SIZE_ONLY);
 			break;
                     case '8': case '9': /* must be a backreference */
                         --p;
@@ -13528,7 +13528,7 @@ parseit:
 		    goto recode_encoding;
 		break;
 	    case 'c':
-		value = grok_bslash_c(*RExC_parse++, UTF, SIZE_ONLY);
+		value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
 		break;
 	    case '0': case '1': case '2': case '3': case '4':
 	    case '5': case '6': case '7':
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke
index 02c6ccc..5a38ba8 100644
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -1361,19 +1361,21 @@ no
 ########
 # toke.c
 use warnings;
-my $a = "\c{ack}";
 $a = "\c,";
 $a = "\c`";
 no warnings 'syntax';
-$a = "\c{ack}";
 $a = "\c,";
 $a = "\c`";
-no warnings 'deprecated';
 EXPECT
-"\c{" is deprecated and is more clearly written as ";" at - line 3.
-"\c," is more clearly written simply as "l" at - line 4.
-"\c`" is more clearly written simply as "\ " at - line 5.
-"\c{" is deprecated and is more clearly written as ";" at - line 7.
+"\c," is more clearly written simply as "l" at - line 3.
+"\c`" is more clearly written simply as "\ " at - line 4.
+########
+# toke.c
+use warnings;
+my $a = "\c{ack}";
+EXPECT
+OPTION fatal
+Use ";" instead of "\c{" at - line 3.
 ########
 # toke.c
 use warnings 'syntax' ;
diff --git a/toke.c b/toke.c
index 290af07..766685d 100644
--- a/toke.c
+++ b/toke.c
@@ -3777,7 +3777,7 @@ S_scan_const(pTHX_ char *start)
 	    case 'c':
 		s++;
 		if (s < send) {
-		    *d++ = grok_bslash_c(*s++, has_utf8, 1);
+		    *d++ = grok_bslash_c(*s++, 1);
 		}
 		else {
 		    yyerror("Missing control char name in \\c");
-- 
2.7.4