#endif
ApM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
-Adp |UV |utf8n_to_uvuni |NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
+Adp |UV |utf8n_to_uvoffuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
+Ap |UV |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
#ifdef EBCDIC
Apd |U8* |uvchr_to_utf8 |NN U8 *d|UV uv
Ap |U8* |uvuni_to_utf8 |NN U8 *d|UV uv
Ap |U8* |uvchr_to_utf8_flags |NN U8 *d|UV uv|UV flags
-Apd |U8* |uvuni_to_utf8_flags |NN U8 *d|UV uv|UV flags
+Apd |U8* |uvoffuni_to_utf8_flags |NN U8 *d|UV uv|UV flags
+Ap |U8* |uvuni_to_utf8_flags |NN U8 *d|UV uv|UV flags
Apd |char* |pv_uni_display |NN SV *dsv|NN const U8 *spv|STRLEN len|STRLEN pvlim|UV flags
ApdR |char* |sv_uni_display |NN SV *dsv|NN SV *ssv|STRLEN pvlim|UV flags
: Used by Data::Alias
#define utf8_to_uvchr_buf(a,b,c) Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
#define utf8_to_uvuni(a,b) Perl_utf8_to_uvuni(aTHX_ a,b)
#define utf8_to_uvuni_buf(a,b,c) Perl_utf8_to_uvuni_buf(aTHX_ a,b,c)
+#define utf8n_to_uvoffuni(a,b,c,d) Perl_utf8n_to_uvoffuni(aTHX_ a,b,c,d)
#define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
#define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
+#define uvoffuni_to_utf8_flags(a,b,c) Perl_uvoffuni_to_utf8_flags(aTHX_ a,b,c)
#define uvuni_to_utf8(a,b) Perl_uvuni_to_utf8(aTHX_ a,b)
#define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
#define valid_utf8_to_uvchr(a,b) Perl_valid_utf8_to_uvchr(aTHX_ a,b)
{
PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
- return Perl_uvuni_to_utf8_flags(aTHX_ d, uv, 0);
+ return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0);
}
bool
return ch;
}
+/*
+=for apidoc uvuni_to_utf8_flags
+
+Instead you almost certainly want to use L</uvchr_to_utf8> or
+L</uvchr_to_utf8_flags>>.
+
+This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
+which itself, while not deprecated, should be used only in isolated
+circumstances. These functions were useful for code that wanted to handle
+both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
+v5.20, the distinctions between the platforms have mostly been made invisible
+to most code, so this function is quite unlikely to be what you want.
+
+=cut
+*/
+
+U8 *
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+{
+ PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+
+ return uvoffuni_to_utf8_flags(d, uv, flags);
+}
+
+/*
+=for apidoc utf8n_to_uvuni
+
+Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>.
+
+This function was usefulfor code that wanted to handle both EBCDIC and
+ASCII platforms with Unicode properties, but starting in Perl v5.20, the
+distinctions between the platforms have mostly been made invisible to most
+code, so this function is quite unlikely to be what you want.
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead.
+
+=cut
+*/
+
+UV
+Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+{
+ PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+
+ return utf8n_to_uvoffuni(s, curlen, retlen, flags);
+}
END_EXTERN_C
#define PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF \
assert(s); assert(send)
+PERL_CALLCONV UV Perl_utf8n_to_uvoffuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_UTF8N_TO_UVOFFUNI \
+ assert(s)
+
PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_UTF8N_TO_UVUNI \
#define PERL_ARGS_ASSERT_UVCHR_TO_UTF8_FLAGS \
assert(d)
+PERL_CALLCONV U8* Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS \
+ assert(d)
+
PERL_CALLCONV U8* Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_UVUNI_TO_UTF8 \
}
/*
-=for apidoc uvuni_to_utf8_flags
+=for apidoc uvoffuni_to_utf8_flags
THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
bytes available. The return value is the pointer to the byte after the
end of the new character. In other words,
- d = uvuni_to_utf8_flags(d, uv, flags);
+ d = uvoffuni_to_utf8_flags(d, uv, flags);
or, in most cases,
- d = uvuni_to_utf8_flags(d, uv, 0);
+ d = uvoffuni_to_utf8_flags(d, uv, 0);
This is the Unicode-aware way of saying
*/
U8 *
-Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
{
- PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+ PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
/* The first problematic code point is the first surrogate */
if (uv >= UNICODE_SURROGATE_FIRST
/*
-=for apidoc utf8n_to_uvuni
+=for apidoc utf8n_to_uvoffuni
+
+THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
Bottom level UTF-8 decode routine.
-Returns the code point value of the first character in the string C<s>,
+Returns the official Unicode (not native) code point value of the first
+character in the string C<s>,
which is assumed to be in UTF-8 (or UTF-EBCDIC) encoding, and no longer than
C<curlen> bytes; C<*retlen> (if C<retlen> isn't NULL) will be set to
the length, in bytes, of that character.
*/
UV
-Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+Perl_utf8n_to_uvoffuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
{
dVAR;
const U8 * const s0 = s;
const char* const malformed_text = "Malformed UTF-8 character";
- PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+ PERL_ARGS_ASSERT_UTF8N_TO_UVOFFUNI;
/* The order of malformation tests here is important. We should consume as
* few bytes as possible in order to not skip any valid character. This is
(or the Unicode REPLACEMENT CHARACTER if not), is silently returned, and
C<*retlen> is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is
the next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is
returned.
=cut
the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
=cut
*/
the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
=cut
*/
assert(send > s);
/* Call the low level routine asking for checks */
- return Perl_utf8n_to_uvuni(aTHX_ s, send -s, retlen,
+ return Perl_utf8n_to_uvoffuni(aTHX_ s, send -s, retlen,
ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
=cut
*/
{
PERL_ARGS_ASSERT_UVCHR_TO_UTF8;
- return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
+ return Perl_uvoffuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
}
U8 *
{
PERL_ARGS_ASSERT_UVCHR_TO_UTF8_FLAGS;
- return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
+ return Perl_uvoffuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
}
/*
which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
length, in bytes, of that character.
-C<length> and C<flags> are the same as L</utf8n_to_uvuni>().
+C<length> and C<flags> are the same as L</utf8n_to_uvoffuni>().
=cut
*/
Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen,
U32 flags)
{
- const UV uv = Perl_utf8n_to_uvuni(aTHX_ s, curlen, retlen, flags);
+ const UV uv = Perl_utf8n_to_uvoffuni(aTHX_ s, curlen, retlen, flags);
PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
#define NATIVE_TO_UNI(ch) (ch)
/* As there are no translations, avoid the function wrapper */
-#define utf8n_to_uvchr utf8n_to_uvuni
-#define uvchr_to_utf8(a,b) uvuni_to_utf8_flags(a,b,0)
+#define utf8n_to_uvchr utf8n_to_uvoffuni
+#define uvchr_to_utf8(a,b) uvoffuni_to_utf8_flags(a,b,0)
/*