is unused at the moment) to be less forgiving about bad UTF-8.
p4raw-id: //depot/perl@7869
Ap |U8* |utf16_to_utf8 |U8* p|U8 *d|I32 bytelen|I32 *newlen
Ap |U8* |utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen
Ap |STRLEN |utf8_length |U8* s|U8 *e
-Ap |I32 |utf8_distance |U8 *a|U8 *b
+Ap |IV |utf8_distance |U8 *a|U8 *b
Ap |U8* |utf8_hop |U8 *s|I32 off
ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len
ApM |U8* |bytes_to_utf8 |U8 *s|STRLEN *len
PERL_CALLCONV U8* Perl_utf16_to_utf8(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ U8* s, U8 *e);
-PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
+PERL_CALLCONV IV Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off);
PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
=for apidoc|utf8_length|U8 *s|U8 *e
Return the length of the UTF-8 char encoded string C<s> in characters.
-Stops at string C<e>. If C<e E<lt> s> or if the scan would end up
-past C<e>, return -1.
+Stops at C<e> (inclusive). If C<e E<lt> s> or if the scan would end
+up past C<e>, croaks.
=cut
*/
STRLEN len = 0;
if (e < s)
- return -1;
+ Perl_croak(aTHX_ "panic: utf8_length: unexpected end");
while (s < e) {
- STRLEN t = UTF8SKIP(s);
+ U8 t = UTF8SKIP(s);
if (e - s < t)
- return -1;
+ Perl_croak(aTHX_ "panic: utf8_length: unaligned end");
s += t;
len++;
}
/* utf8_distance(a,b) returns the number of UTF8 characters between
the pointers a and b */
-I32
+IV
Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
{
- I32 off = 0;
+ IV off = 0;
+
if (a < b) {
while (a < b) {
- a += UTF8SKIP(a);
+ U8 c = UTF8SKIP(a);
+
+ if (b - a < c)
+ Perl_croak(aTHX_ "panic: utf8_distance: unaligned end");
+ a += c;
off--;
}
}
else {
while (b < a) {
- b += UTF8SKIP(b);
+ U8 c = UTF8SKIP(b);
+
+ if (a - b < c)
+ Perl_croak(aTHX_ "panic: utf8_distance: unaligned end");
+ b += c;
off++;
}
}
+
return off;
}