ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *len
Apd |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
Apd |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
+Apd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
+Apd |UV |utf8_to_uvuni_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
pM |bool |check_utf8_print |NN const U8 *s|const STRLEN len
#ifdef EBCDIC
#define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b)
#define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
#define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
+#define utf8_to_uvchr_buf(a,b,c) Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
#define utf8_to_uvuni(a,b) Perl_utf8_to_uvuni(aTHX_ a,b)
+#define utf8_to_uvuni_buf(a,b,c) Perl_utf8_to_uvuni_buf(aTHX_ a,b,c)
#define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
#define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
#define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
=item *
-XXX
+Two new functions C<utf8_to_uvchr_buf()> and C<utf8_to_uvuni_buf()> have
+been added. These are the same as C<utf8_to_uvchr> and
+C<utf8_to_uvuni>, but take an extra parameter that is used to guard
+against reading beyond the end of the input string.
+See L<perlapi/utf8_to_uvchr_buf> and L<perlapi/utf8_to_uvuni_buf>.
=back
#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \
assert(s)
+PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF \
+ assert(s); assert(send)
+
PERL_CALLCONV UV Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_UTF8_TO_UVUNI \
assert(s)
+PERL_CALLCONV UV Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF \
+ assert(s); assert(send)
+
PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_UTF8N_TO_UVUNI \
use and those yet to be assigned, are never considered malformed and never
warn.
-Most code should use L</utf8_to_uvchr>() rather than call this directly.
+Most code should use L</utf8_to_uvchr_buf>() rather than call this directly.
=cut
*/
}
/*
+=for apidoc utf8_to_uvchr_buf
+
+Returns the native code point of the first character in the string C<s> which
+is assumed to be in UTF-8 encoding; C<send> points to 1 beyond the end of C<s>.
+C<retlen> will be set to the length, in bytes, of that character.
+
+If C<s> does not point to a well-formed UTF-8 character, zero is
+returned and C<retlen> is set, if possible, to -1.
+
+=cut
+*/
+
+
+UV
+Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
+
+ assert(s < send);
+
+ return utf8n_to_uvchr(s, send - s, retlen,
+ ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+}
+
+/*
=for apidoc utf8_to_uvchr
Returns the native code point of the first character in the string C<s>
}
/*
+=for apidoc utf8_to_uvuni_buf
+
+Returns the Unicode code point of the first character in the string C<s> which
+is assumed to be in UTF-8 encoding; C<send> points to 1 beyond the end of C<s>.
+C<retlen> will be set to the length, in bytes, of that character.
+
+This function should only be used when the returned UV is considered
+an index into the Unicode semantic tables (e.g. swashes).
+
+If C<s> does not point to a well-formed UTF-8 character, zero is
+returned and C<retlen> is set, if possible, to -1.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF;
+
+ assert(send > s);
+
+ /* Call the low level routine asking for checks */
+ return Perl_utf8n_to_uvuni(aTHX_ s, send -s, retlen,
+ ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+}
+
+/*
=for apidoc utf8_to_uvuni
Returns the Unicode code point of the first character in the string C<s>