utf8.c: Add valid_utf8_to_uvuni() and valid_utf8_to_uvchr()
authorKarl Williamson <public@khwilliamson.com>
Mon, 19 Mar 2012 21:13:19 +0000 (15:13 -0600)
committerKarl Williamson <public@khwilliamson.com>
Tue, 20 Mar 2012 00:23:44 +0000 (18:23 -0600)
These functions are like utf8_to_uvuni() and utf8_to_uvchr(), but their
name implies that the input UTF-8 has been validated.

They are not currently documented, as it's best for XS writers to call
the functions that do validation.

embed.fnc
embed.h
proto.h
utf8.c

index 5a49690..d5e25fa 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1450,6 +1450,8 @@ ApMd      |U8*    |bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8
 ApMd   |U8*    |bytes_to_utf8  |NN const U8 *s|NN STRLEN *len
 Apd    |UV     |utf8_to_uvchr  |NN const U8 *s|NULLOK STRLEN *retlen
 Apd    |UV     |utf8_to_uvuni  |NN const U8 *s|NULLOK STRLEN *retlen
+ApdM   |UV     |valid_utf8_to_uvchr    |NN const U8 *s|NULLOK STRLEN *retlen
+ApdM   |UV     |valid_utf8_to_uvuni    |NN const U8 *s|NULLOK STRLEN *retlen
 Apd    |UV     |utf8_to_uvchr_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
 Apd    |UV     |utf8_to_uvuni_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
 pM     |bool   |check_utf8_print       |NN const U8 *s|const STRLEN len
diff --git a/embed.h b/embed.h
index 8a39047..31e024c 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define utf8n_to_uvuni(a,b,c,d)        Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
 #define uvchr_to_utf8_flags(a,b,c)     Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
 #define uvuni_to_utf8_flags(a,b,c)     Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
+#define valid_utf8_to_uvchr(a,b)       Perl_valid_utf8_to_uvchr(aTHX_ a,b)
+#define valid_utf8_to_uvuni(a,b)       Perl_valid_utf8_to_uvuni(aTHX_ a,b)
 #define vcmp(a,b)              Perl_vcmp(aTHX_ a,b)
 #define vcroak(a,b)            Perl_vcroak(aTHX_ a,b)
 #define vdeb(a,b)              Perl_vdeb(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 9c91855..5bc2424 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4605,6 +4605,16 @@ PERL_CALLCONV U8*        Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
 #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS   \
        assert(d)
 
+PERL_CALLCONV UV       Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR   \
+       assert(s)
+
+PERL_CALLCONV UV       Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI   \
+       assert(s)
+
 PERL_CALLCONV int      Perl_vcmp(pTHX_ SV *lhv, SV *rhv)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
diff --git a/utf8.c b/utf8.c
index 1faa96d..c9bc63a 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -819,6 +819,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
                          ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
+/* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
+
+    return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
 /*
 =for apidoc utf8_to_uvchr
 
@@ -869,6 +882,19 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
                               ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
+/* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+
+    return utf8_to_uvuni_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
 /*
 =for apidoc utf8_to_uvuni