utf8.c: Stop using two functions

author Karl Williamson <public@khwilliamson.com>

Tue, 26 Feb 2013 19:08:50 +0000 (12:08 -0700)

committer Karl Williamson <public@khwilliamson.com>

Thu, 29 Aug 2013 15:55:58 +0000 (09:55 -0600)
author Karl Williamson <public@khwilliamson.com>
Tue, 26 Feb 2013 19:08:50 +0000 (12:08 -0700)
committer Karl Williamson <public@khwilliamson.com>
Thu, 29 Aug 2013 15:55:58 +0000 (09:55 -0600)
diff --git a/embed.fnc b/embed.fnc

index 23c4311..5372149 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1554,7 +1554,8 @@ Adpbm     |UV     |utf8n_to_uvchr |NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U3
  #endif
  ApM    |UV     |valid_utf8_to_uvchr    |NN const U8 *s|NULLOK STRLEN *retlen
  
-Adp    |UV     |utf8n_to_uvuni |NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
+Adp    |UV     |utf8n_to_uvoffuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
+Ap     |UV     |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
  
  #ifdef EBCDIC
  Apd    |U8*    |uvchr_to_utf8  |NN U8 *d|UV uv
@@ -1564,7 +1565,8 @@ Apdbm     |U8*    |uvchr_to_utf8  |NN U8 *d|UV uv
  
  Ap     |U8*    |uvuni_to_utf8  |NN U8 *d|UV uv
  Ap     |U8*    |uvchr_to_utf8_flags    |NN U8 *d|UV uv|UV flags
-Apd    |U8*    |uvuni_to_utf8_flags    |NN U8 *d|UV uv|UV flags
+Apd    |U8*    |uvoffuni_to_utf8_flags |NN U8 *d|UV uv|UV flags
+Ap     |U8*    |uvuni_to_utf8_flags    |NN U8 *d|UV uv|UV flags
  Apd    |char*  |pv_uni_display |NN SV *dsv|NN const U8 *spv|STRLEN len|STRLEN pvlim|UV flags
  ApdR   |char*  |sv_uni_display |NN SV *dsv|NN SV *ssv|STRLEN pvlim|UV flags
  : Used by Data::Alias
diff --git a/embed.h b/embed.h

index b4a56d6..110f735 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -696,8 +696,10 @@
  #define utf8_to_uvchr_buf(a,b,c)       Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
  #define utf8_to_uvuni(a,b)     Perl_utf8_to_uvuni(aTHX_ a,b)
  #define utf8_to_uvuni_buf(a,b,c)       Perl_utf8_to_uvuni_buf(aTHX_ a,b,c)
+#define utf8n_to_uvoffuni(a,b,c,d)     Perl_utf8n_to_uvoffuni(aTHX_ a,b,c,d)
  #define utf8n_to_uvuni(a,b,c,d)        Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
  #define uvchr_to_utf8_flags(a,b,c)     Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
+#define uvoffuni_to_utf8_flags(a,b,c)  Perl_uvoffuni_to_utf8_flags(aTHX_ a,b,c)
  #define uvuni_to_utf8(a,b)     Perl_uvuni_to_utf8(aTHX_ a,b)
  #define uvuni_to_utf8_flags(a,b,c)     Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
  #define valid_utf8_to_uvchr(a,b)       Perl_valid_utf8_to_uvchr(aTHX_ a,b)
diff --git a/mathoms.c b/mathoms.c

index 030f5b0..183545a 100644 (file)
--- a/mathoms.c
+++ b/mathoms.c
@@ -709,7 +709,7 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
  {
      PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
  
-    return Perl_uvuni_to_utf8_flags(aTHX_ d, uv, 0);
+    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0);
  }
  
  bool
@@ -1227,6 +1227,51 @@ ASCII_TO_NEED(const UV enc, const UV ch)
      return ch;
  }
  
+/*
+=for apidoc uvuni_to_utf8_flags
+
+Instead you almost certainly want to use L</uvchr_to_utf8> or
+L</uvchr_to_utf8_flags>>.
+
+This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
+which itself, while not deprecated, should be used only in isolated
+circumstances.  These functions were useful for code that wanted to handle
+both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
+v5.20, the distinctions between the platforms have mostly been made invisible
+to most code, so this function is quite unlikely to be what you want.
+
+=cut
+*/
+
+U8 *
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+{
+    PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+
+    return uvoffuni_to_utf8_flags(d, uv, flags);
+}
+
+/*
+=for apidoc utf8n_to_uvuni
+
+Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>.
+
+This function was usefulfor code that wanted to handle both EBCDIC and
+ASCII platforms with Unicode properties, but starting in Perl v5.20, the
+distinctions between the platforms have mostly been made invisible to most
+code, so this function is quite unlikely to be what you want.
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead.
+
+=cut
+*/
+
+UV
+Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+{
+    PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+
+    return utf8n_to_uvoffuni(s, curlen, retlen, flags);
+}
  
  END_EXTERN_C
  
diff --git a/proto.h b/proto.h

index 5356ddb..4c95821 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4767,6 +4767,11 @@ PERL_CALLCONV UV Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLE
  #define PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF     \
         assert(s); assert(send)
  
+PERL_CALLCONV UV       Perl_utf8n_to_uvoffuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_UTF8N_TO_UVOFFUNI     \
+       assert(s)
+
  PERL_CALLCONV UV       Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
                         __attribute__nonnull__(pTHX_1);
  #define PERL_ARGS_ASSERT_UTF8N_TO_UVUNI        \
@@ -4777,6 +4782,11 @@ PERL_CALLCONV U8*        Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
  #define PERL_ARGS_ASSERT_UVCHR_TO_UTF8_FLAGS   \
         assert(d)
  
+PERL_CALLCONV U8*      Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS        \
+       assert(d)
+
  PERL_CALLCONV U8*      Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
                         __attribute__nonnull__(pTHX_1);
  #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8 \
diff --git a/utf8.c b/utf8.c

index 22f5331..945e31c 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -87,7 +87,7 @@ Perl_is_ascii_string(const U8 *s, STRLEN len)
  }
  
  /*
-=for apidoc uvuni_to_utf8_flags
+=for apidoc uvoffuni_to_utf8_flags
  
  THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
  
@@ -96,11 +96,11 @@ of the string C<d>; C<d> should have at least C<UTF8_MAXBYTES+1> free
  bytes available. The return value is the pointer to the byte after the
  end of the new character. In other words,
  
-    d = uvuni_to_utf8_flags(d, uv, flags);
+    d = uvoffuni_to_utf8_flags(d, uv, flags);
  
  or, in most cases,
  
-    d = uvuni_to_utf8_flags(d, uv, 0);
+    d = uvoffuni_to_utf8_flags(d, uv, 0);
  
  This is the Unicode-aware way of saying
  
@@ -137,9 +137,9 @@ DISALLOW flags.
  */
  
  U8 *
-Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
  {
-    PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+    PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
  
      /* The first problematic code point is the first surrogate */
      if (uv >= UNICODE_SURROGATE_FIRST
@@ -475,10 +475,13 @@ Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
  
  /*
  
-=for apidoc utf8n_to_uvuni
+=for apidoc utf8n_to_uvoffuni
+
+THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
  
  Bottom level UTF-8 decode routine.
-Returns the code point value of the first character in the string C<s>,
+Returns the official Unicode (not native) code point value of the first
+character in the string C<s>,
  which is assumed to be in UTF-8 (or UTF-EBCDIC) encoding, and no longer than
  C<curlen> bytes; C<*retlen> (if C<retlen> isn't NULL) will be set to
  the length, in bytes, of that character.
@@ -553,7 +556,7 @@ Most code should use L</utf8_to_uvchr_buf>() rather than call this directly.
  */
  
  UV
-Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+Perl_utf8n_to_uvoffuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
  {
      dVAR;
      const U8 * const s0 = s;
@@ -571,7 +574,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
  
      const char* const malformed_text = "Malformed UTF-8 character";
  
-    PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+    PERL_ARGS_ASSERT_UTF8N_TO_UVOFFUNI;
  
      /* The order of malformation tests here is important.  We should consume as
       * few bytes as possible in order to not skip any valid character.  This is
@@ -905,7 +908,7 @@ NULL) to -1.  If those warnings are off, the computed value, if well-defined
  (or the Unicode REPLACEMENT CHARACTER if not), is silently returned, and
  C<*retlen> is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is
  the next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is
  returned.
  
  =cut
@@ -978,7 +981,7 @@ NULL) to -1.  If those warnings are off, the computed value if well-defined (or
  the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
  is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
  next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
  
  =cut
  */
@@ -1008,7 +1011,7 @@ NULL) to -1.  If those warnings are off, the computed value if well-defined (or
  the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
  is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
  next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
  
  =cut
  */
@@ -1021,7 +1024,7 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
      assert(send > s);
  
      /* Call the low level routine asking for checks */
-    return Perl_utf8n_to_uvuni(aTHX_ s, send -s, retlen,
+    return Perl_utf8n_to_uvoffuni(aTHX_ s, send -s, retlen,
                                ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
  }
  
@@ -1057,7 +1060,7 @@ NULL) to -1.  If those warnings are off, the computed value if well-defined (or
  the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
  is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
  next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
  
  =cut
  */
@@ -4191,7 +4194,7 @@ Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv)
  {
      PERL_ARGS_ASSERT_UVCHR_TO_UTF8;
  
-    return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
+    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
  }
  
  U8 *
@@ -4199,7 +4202,7 @@ Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
  {
      PERL_ARGS_ASSERT_UVCHR_TO_UTF8_FLAGS;
  
-    return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
+    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
  }
  
  /*
@@ -4210,7 +4213,7 @@ C<s>
  which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
  length, in bytes, of that character.
  
-C<length> and C<flags> are the same as L</utf8n_to_uvuni>().
+C<length> and C<flags> are the same as L</utf8n_to_uvoffuni>().
  
  =cut
  */
@@ -4221,7 +4224,7 @@ UV
  Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen,
  U32 flags)
  {
-    const UV uv = Perl_utf8n_to_uvuni(aTHX_ s, curlen, retlen, flags);
+    const UV uv = Perl_utf8n_to_uvoffuni(aTHX_ s, curlen, retlen, flags);
  
      PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
  
diff --git a/utf8.h b/utf8.h

index 251e32d..b3bf997 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -123,8 +123,8 @@ END_EXTERN_C
  #define NATIVE_TO_UNI(ch)        (ch)
  
  /* As there are no translations, avoid the function wrapper */
-#define utf8n_to_uvchr utf8n_to_uvuni
-#define uvchr_to_utf8(a,b) uvuni_to_utf8_flags(a,b,0)
+#define utf8n_to_uvchr utf8n_to_uvoffuni
+#define uvchr_to_utf8(a,b) uvoffuni_to_utf8_flags(a,b,0)
  
  /*
author	Karl Williamson <public@khwilliamson.com>
	Tue, 26 Feb 2013 19:08:50 +0000 (12:08 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Thu, 29 Aug 2013 15:55:58 +0000 (09:55 -0600)
embed.fnc		patch \| blob \| history
embed.h		patch \| blob \| history
mathoms.c		patch \| blob \| history
proto.h		patch \| blob \| history
utf8.c		patch \| blob \| history
utf8.h		patch \| blob \| history