From 6ef2ab89d2567e144b289574a2e087dd7eec7894 Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Mon, 12 Jul 2010 13:09:28 +0100 Subject: [PATCH] Perl_sv_len_utf8 can use the UTF-8 offset cache to reduce its linear scan. Previously, if the scalar's character length wasn't yet known, but an offset midway was, the offset would be ignored, and the linear scan of UTF-8 was for the entire length of the scalar. --- sv.c | 14 ++++++++++++-- t/op/length.t | 9 ++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sv.c b/sv.c index 3a25abc..6cfb488 100644 --- a/sv.c +++ b/sv.c @@ -6047,8 +6047,18 @@ Perl_sv_len_utf8(pTHX_ register SV *const sv) STRLEN ulen; MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; - if (mg && mg->mg_len != -1) { - ulen = mg->mg_len; + if (mg && (mg->mg_len != -1 || mg->mg_ptr)) { + if (mg->mg_len != -1) + ulen = mg->mg_len; + else { + /* We can use the offset cache for a headstart. + The longer value is stored in the first pair. */ + STRLEN *cache = (STRLEN *) mg->mg_ptr; + + ulen = cache[0] + Perl_utf8_length(aTHX_ s + cache[1], + s + len); + } + if (PL_utf8cache < 0) { const STRLEN real = Perl_utf8_length(aTHX_ s, s + len); if (real != ulen) { diff --git a/t/op/length.t b/t/op/length.t index eb35720..c73d4c5 100644 --- a/t/op/length.t +++ b/t/op/length.t @@ -6,7 +6,7 @@ BEGIN { @INC = '../lib'; } -plan (tests => 28); +plan (tests => 30); print "not " unless length("") == 0; print "ok 1\n"; @@ -196,3 +196,10 @@ is(length($uo), undef, "Length of overloaded reference"); # ok(!defined $uo); Turns you can't test this. FIXME for pp_defined? is($warnings, 0, "There were no warnings"); + +{ + my $y = "\x{100}BC"; + is(index($y, "B"), 1, 'adds an intermediate position to the offset cache'); + is(length $y, 3, + 'Check that sv_len_utf8() can take advantage of the offset cache'); +} -- 2.7.4