From 3986bb7cb3afd3afe090d21354792863280c5377 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 28 Apr 2012 18:48:52 -0600
Subject: [PATCH] utf8.c: Remove unnecessary validation

These two functions are to be called only on strings known to be valid,
so we can skip the validation.
---
 utf8.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)
diff --git a/utf8.c b/utf8.c
index ce0820d..24900b9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -915,16 +915,17 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
 }
 
 /* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known that
- * there are no malformations in the input UTF-8 string C<s>.  Currently, some
- * malformations are checked for, but this checking likely will be removed in
- * the future */
+ * there are no malformations in the input UTF-8 string C<s>.  surrogates,
+ * non-character code points, and non-Unicode code points are allowed */
 
 UV
 Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
 {
+    const UV uv = valid_utf8_to_uvuni(s, retlen);
+
     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 
-    return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+    return UNI_TO_NATIVE(uv);
 }
 
 /*
@@ -993,16 +994,38 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
 }
 
 /* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
- * there are no malformations in the input UTF-8 string C<s>.  Currently, some
- * malformations are checked for, but this checking likely will be removed in
- * the future */
+ * there are no malformations in the input UTF-8 string C<s>.  surrogates,
+ * non-character code points, and non-Unicode code points are allowed */
 
 UV
 Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
 {
+    UV expectlen = UTF8SKIP(s);
+    const U8* send = s + expectlen;
+    UV uv = NATIVE_TO_UTF(*s);
+
     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
 
-    return utf8_to_uvuni_buf(s, s + UTF8_MAXBYTES, retlen);
+    if (retlen) {
+	*retlen = expectlen;
+    }
+
+    /* An invariant is trivially returned */
+    if (expectlen == 1) {
+	return uv;
+    }
+
+    /* Remove the leading bits that indicate the number of bytes, leaving just
+     * the bits that are part of the value */
+    uv &= UTF_START_MASK(expectlen);
+
+    /* Now, loop through the remaining bytes, accumulating each into the
+     * working total as we go */
+    for (++s; s < send; s++) {
+	uv = UTF8_ACCUMULATE(uv, *s);
+    }
+
+    return uv;
 }
 
 /*
-- 
2.7.4