Previously, if the scalar's character length wasn't yet known, but an offset
midway was, the offset would be ignored, and the linear scan of UTF-8 was for
the entire length of the scalar.
STRLEN ulen;
MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
- if (mg && mg->mg_len != -1) {
- ulen = mg->mg_len;
+ if (mg && (mg->mg_len != -1 || mg->mg_ptr)) {
+ if (mg->mg_len != -1)
+ ulen = mg->mg_len;
+ else {
+ /* We can use the offset cache for a headstart.
+ The longer value is stored in the first pair. */
+ STRLEN *cache = (STRLEN *) mg->mg_ptr;
+
+ ulen = cache[0] + Perl_utf8_length(aTHX_ s + cache[1],
+ s + len);
+ }
+
if (PL_utf8cache < 0) {
const STRLEN real = Perl_utf8_length(aTHX_ s, s + len);
if (real != ulen) {
@INC = '../lib';
}
-plan (tests => 28);
+plan (tests => 30);
print "not " unless length("") == 0;
print "ok 1\n";
# ok(!defined $uo); Turns you can't test this. FIXME for pp_defined?
is($warnings, 0, "There were no warnings");
+
+{
+ my $y = "\x{100}BC";
+ is(index($y, "B"), 1, 'adds an intermediate position to the offset cache');
+ is(length $y, 3,
+ 'Check that sv_len_utf8() can take advantage of the offset cache');
+}