Deprecate utf8_to_uni_buf()

author Karl Williamson <public@khwilliamson.com>

Tue, 26 Feb 2013 18:02:33 +0000 (11:02 -0700)

committer Karl Williamson <public@khwilliamson.com>

Thu, 29 Aug 2013 15:55:57 +0000 (09:55 -0600)
author Karl Williamson <public@khwilliamson.com>
Tue, 26 Feb 2013 18:02:33 +0000 (11:02 -0700)
committer Karl Williamson <public@khwilliamson.com>
Thu, 29 Aug 2013 15:55:57 +0000 (09:55 -0600)
diff --git a/embed.fnc b/embed.fnc

index a0bf3bf..75fb103 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1544,7 +1544,7 @@ ApdD      |UV     |utf8_to_uvchr  |NN const U8 *s|NULLOK STRLEN *retlen
  ApdD   |UV     |utf8_to_uvuni  |NN const U8 *s|NULLOK STRLEN *retlen
  ApMD   |UV     |valid_utf8_to_uvuni    |NN const U8 *s|NULLOK STRLEN *retlen
  Apd    |UV     |utf8_to_uvchr_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
-Apd    |UV     |utf8_to_uvuni_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
+ApdD   |UV     |utf8_to_uvuni_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
  pM     |bool   |check_utf8_print       |NN const U8 *s|const STRLEN len
  
  #ifdef EBCDIC
diff --git a/proto.h b/proto.h

index 9a170b8..40a4cc0 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4761,6 +4761,7 @@ PERL_CALLCONV UV  Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
         assert(s)
  
  PERL_CALLCONV UV       Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+                       __attribute__deprecated__
                         __attribute__nonnull__(pTHX_1)
                         __attribute__nonnull__(pTHX_2);
  #define PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF     \
diff --git a/utf8.c b/utf8.c

index 4cc12d6..b1dc30b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -996,13 +996,14 @@ Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
  /*
  =for apidoc utf8_to_uvuni_buf
  
-Returns the Unicode code point of the first character in the string C<s> which
+Only in very rare circumstances should code need to be dealing in the Unicode
+code point.  Use L</utf8_to_uvchr_buf> instead.
+
+Returns the Unicode (not-native) code point of the first character in the
+string C<s> which
  is assumed to be in UTF-8 encoding; C<send> points to 1 beyond the end of C<s>.
  C<retlen> will be set to the length, in bytes, of that character.
  
-This function should only be used when the returned UV is considered
-an index into the Unicode semantic tables (e.g. swashes).
-
  If C<s> does not point to a well-formed UTF-8 character and UTF8 warnings are
  enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't
  NULL) to -1.  If those warnings are off, the computed value if well-defined (or
@@ -1046,12 +1047,11 @@ Returns the Unicode code point of the first character in the string C<s>
  which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
  length, in bytes, of that character.
  
-This function should only be used when the returned UV is considered
-an index into the Unicode semantic tables (e.g. swashes).
-
  Some, but not all, UTF-8 malformations are detected, and in fact, some
  malformed input could cause reading beyond the end of the input buffer, which
-is why this function is deprecated.  Use L</utf8_to_uvuni_buf> instead.
+is one reason why this function is deprecated.  The other is that only in
+extremely limited circumstances should the Unicode versus native code point be
+of any interest to you.  Use L</utf8_to_uvchr_buf> instead.
  
  If C<s> points to one of the detected malformations, and UTF8 warnings are
  enabled, zero is returned and C<*retlen> is set (if C<retlen> doesn't point to
author	Karl Williamson <public@khwilliamson.com>
	Tue, 26 Feb 2013 18:02:33 +0000 (11:02 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Thu, 29 Aug 2013 15:55:57 +0000 (09:55 -0600)
embed.fnc		patch \| blob \| history
proto.h		patch \| blob \| history
utf8.c		patch \| blob \| history