utf8.c: Add valid_utf8_to_uvuni() and valid_utf8_to_uvchr()

author Karl Williamson <public@khwilliamson.com>

Mon, 19 Mar 2012 21:13:19 +0000 (15:13 -0600)

committer Karl Williamson <public@khwilliamson.com>

Tue, 20 Mar 2012 00:23:44 +0000 (18:23 -0600)
author Karl Williamson <public@khwilliamson.com>
Mon, 19 Mar 2012 21:13:19 +0000 (15:13 -0600)
committer Karl Williamson <public@khwilliamson.com>
Tue, 20 Mar 2012 00:23:44 +0000 (18:23 -0600)
diff --git a/embed.fnc b/embed.fnc

index 5a49690..d5e25fa 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1450,6 +1450,8 @@ ApMd      |U8*    |bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8
  ApMd   |U8*    |bytes_to_utf8  |NN const U8 *s|NN STRLEN *len
  Apd    |UV     |utf8_to_uvchr  |NN const U8 *s|NULLOK STRLEN *retlen
  Apd    |UV     |utf8_to_uvuni  |NN const U8 *s|NULLOK STRLEN *retlen
+ApdM   |UV     |valid_utf8_to_uvchr    |NN const U8 *s|NULLOK STRLEN *retlen
+ApdM   |UV     |valid_utf8_to_uvuni    |NN const U8 *s|NULLOK STRLEN *retlen
  Apd    |UV     |utf8_to_uvchr_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
  Apd    |UV     |utf8_to_uvuni_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
  pM     |bool   |check_utf8_print       |NN const U8 *s|const STRLEN len
diff --git a/embed.h b/embed.h

index 8a39047..31e024c 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -678,6 +678,8 @@
  #define utf8n_to_uvuni(a,b,c,d)        Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
  #define uvchr_to_utf8_flags(a,b,c)     Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
  #define uvuni_to_utf8_flags(a,b,c)     Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
+#define valid_utf8_to_uvchr(a,b)       Perl_valid_utf8_to_uvchr(aTHX_ a,b)
+#define valid_utf8_to_uvuni(a,b)       Perl_valid_utf8_to_uvuni(aTHX_ a,b)
  #define vcmp(a,b)              Perl_vcmp(aTHX_ a,b)
  #define vcroak(a,b)            Perl_vcroak(aTHX_ a,b)
  #define vdeb(a,b)              Perl_vdeb(aTHX_ a,b)
diff --git a/proto.h b/proto.h

index 9c91855..5bc2424 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4605,6 +4605,16 @@ PERL_CALLCONV U8*        Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
  #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS   \
         assert(d)
  
+PERL_CALLCONV UV       Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR   \
+       assert(s)
+
+PERL_CALLCONV UV       Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI   \
+       assert(s)
+
  PERL_CALLCONV int      Perl_vcmp(pTHX_ SV *lhv, SV *rhv)
                         __attribute__nonnull__(pTHX_1)
                         __attribute__nonnull__(pTHX_2);
diff --git a/utf8.c b/utf8.c

index 1faa96d..c9bc63a 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -819,6 +819,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
                           ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
  }
  
+/* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
+
+    return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
  /*
  =for apidoc utf8_to_uvchr
  
@@ -869,6 +882,19 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
                                ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
  }
  
+/* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Currently, some
+ * malformations are checked for, but this checking likely will be removed in
+ * the future */
+
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+
+    return utf8_to_uvuni_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
  /*
  =for apidoc utf8_to_uvuni
author	Karl Williamson <public@khwilliamson.com>
	Mon, 19 Mar 2012 21:13:19 +0000 (15:13 -0600)
committer	Karl Williamson <public@khwilliamson.com>
	Tue, 20 Mar 2012 00:23:44 +0000 (18:23 -0600)
embed.fnc		patch \| blob \| history
embed.h		patch \| blob \| history
proto.h		patch \| blob \| history
utf8.c		patch \| blob \| history