From 3d81eea60b810797ecb1123661164802c8a1984b Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Wed, 20 Feb 2013 10:39:48 -0700
Subject: [PATCH] Deprecate NATIVE_TO_NEED and ASCII_TO_NEED

These macros are no longer called in the Perl core.  This commit turns
them into functions so that they can use gcc's deprecation facility.

I believe these were defective right from the beginning, and I have
struggled to understand what's going on.  From the name, it appears
NATIVE_TO_NEED taks a native byte and turns it into UTF-8 if the
appropriate parameter indicates that.  But that is impossible to do
correctly from that API, as for variant characters, it needs to return
two bytes.  It could only work correctly if ch is an I8 byte, which
isn't native, and hence the name would be wrong.

Similar arguments for ASCII_TO_NEED.

The function S_append_utf8_from_native_byte(const U8 byte, U8** dest)
does what I think NATIVE_TO_NEED intended.
---
 embed.fnc   |  2 ++
 mathoms.c   | 15 +++++++++++++++
 proto.h     | 10 ++++++++++
 toke.c      | 10 ----------
 utf8.h      |  3 ---
 utfebcdic.h |  8 --------
 6 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 7747453..0331056 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -899,6 +899,8 @@ pX	|I32	|my_stat_flags	|NULLOK const U32 flags
 Ap	|char *	|my_strftime	|NN const char *fmt|int sec|int min|int hour|int mday|int mon|int year|int wday|int yday|int isdst
 : Used in pp_ctl.c
 p	|void	|my_unexec
+ADMnoPR	|UV	|NATIVE_TO_NEED	|const UV enc|const UV ch
+ADMnoPR	|UV	|ASCII_TO_NEED	|const UV enc|const UV ch
 Apa	|OP*	|newANONLIST	|NULLOK OP* o
 Apa	|OP*	|newANONHASH	|NULLOK OP* o
 Ap	|OP*	|newANONSUB	|I32 floor|NULLOK OP* proto|NULLOK OP* block
diff --git a/mathoms.c b/mathoms.c
index 9a7e3ff..030f5b0 100644
--- a/mathoms.c
+++ b/mathoms.c
@@ -1213,6 +1213,21 @@ Perl_sv_mortalcopy(pTHX_ SV *const oldstr)
     return Perl_sv_mortalcopy_flags(aTHX_ oldstr, SV_GMAGIC);
 }
 
+UV      /* Made into a function, so can be deprecated */
+NATIVE_TO_NEED(const UV enc, const UV ch)
+{
+    PERL_UNUSED_ARG(enc);
+    return ch;
+}
+
+UV      /* Made into a function, so can be deprecated */
+ASCII_TO_NEED(const UV enc, const UV ch)
+{
+    PERL_UNUSED_ARG(enc);
+    return ch;
+}
+
+
 END_EXTERN_C
 
 #endif /* NO_MATHOMS */
diff --git a/proto.h b/proto.h
index 0da17ec..51e4009 100644
--- a/proto.h
+++ b/proto.h
@@ -17,11 +17,21 @@
  */
 
 START_EXTERN_C
+PERL_CALLCONV UV	ASCII_TO_NEED(const UV enc, const UV ch)
+			__attribute__deprecated__
+			__attribute__warn_unused_result__
+			__attribute__pure__;
+
 PERL_CALLCONV int	Perl_Gv_AMupdate(pTHX_ HV* stash, bool destructing)
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_GV_AMUPDATE	\
 	assert(stash)
 
+PERL_CALLCONV UV	NATIVE_TO_NEED(const UV enc, const UV ch)
+			__attribute__deprecated__
+			__attribute__warn_unused_result__
+			__attribute__pure__;
+
 PERL_CALLCONV const char *	Perl_PerlIO_context_layers(pTHX_ const char *mode);
 PERL_CALLCONV void*	Perl_Slab_Alloc(pTHX_ size_t sz)
 			__attribute__malloc__
diff --git a/toke.c b/toke.c
index bab4fbb..7b397e3 100644
--- a/toke.c
+++ b/toke.c
@@ -3475,16 +3475,6 @@ S_scan_const(pTHX_ char *start)
 		 * now, while preserving the fact that it was a named character
 		 * so that the regex compiler knows this */
 
-		/* This section of code doesn't generally use the
-		 * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
-		 * a close examination of this macro and determined it is a
-		 * no-op except on utfebcdic variant characters.  Every
-		 * character generated by this that would normally need to be
-		 * enclosed by this macro is invariant, so the macro is not
-		 * needed, and would complicate use of copy().  XXX There are
-		 * other parts of this file where the macro is used
-		 * inconsistently, but are saved by it being a no-op */
-
 		/* The structure of this section of code (besides checking for
 		 * errors and upgrading to utf8) is:
 		 *  Further disambiguate between the two meanings of \N, and if
diff --git a/utf8.h b/utf8.h
index b76f098..f1205a6 100644
--- a/utf8.h
+++ b/utf8.h
@@ -122,9 +122,6 @@ END_EXTERN_C
 /* Transforms in wide UV chars */
 #define UNI_TO_NATIVE(ch)        (ch)
 #define NATIVE_TO_UNI(ch)        (ch)
-/* Transforms in invariant space */
-#define NATIVE_TO_NEED(enc,ch)   (ch)
-#define ASCII_TO_NEED(enc,ch)    (ch)
 
 /* As there are no translations, avoid the function wrapper */
 #define utf8n_to_uvchr utf8n_to_uvuni
diff --git a/utfebcdic.h b/utfebcdic.h
index ec342b5..766c977 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -531,14 +531,6 @@ END_EXTERN_C
 #define NATIVE_TO_UNI(ch)        (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))
 #define UNI_TO_NATIVE(ch)        (((ch) > 255) ? (ch) : LATIN1_TO_NATIVE(ch))
 
-/* Transform in invariant..byte space */
-#define NATIVE_TO_NEED(enc,ch)   ((enc)                                     \
-                                  ? I8_TO_NATIVE_UTF8(NATIVE_TO_LATIN1(ch)) \
-                                  : (ch))
-#define ASCII_TO_NEED(enc,ch)    ((enc)                   \
-                                  ? I8_TO_NATIVE_UTF8(ch) \
-                                  : LATIN1_TO_NATIVE(ch))
-
 /*
   The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
 
-- 
2.7.4