From c9d50bb49b0231aa883d3d0293ef039845f50237 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Wed, 26 Sep 2012 20:39:55 -0700 Subject: [PATCH] Make sv_len_utf8 return a character count as documented Brought up in ticket #114690. sv_len_utf8 does not make sense. It assumes that the string is UTF-8. If it is not, it just does the wrong thing. For magical variables, it expects mg_length to return the number of characters, but it would only sometimes do that until the previous commit, which restored it to returning bytes for all scalars. Since you have to know already that a string is in utf8 before you can call sv_len_utf8, but sv_len_utf8 might call get-magic which will change the utf8-ness, it really makes no sense as an API. Up till now, it has been consistently buggy with any magic scalars. So I have changed sv_len_utf8 to do exactly what the documentation says: return the number of characters. This also causes an existing buggy code path in sv_len_utf8_nomg to be reached (SvCUR without checking SvPOK), so this fixes that too. --- sv.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/sv.c b/sv.c index 342fc7f..ee0f755 100644 --- a/sv.c +++ b/sv.c @@ -6514,13 +6514,8 @@ Perl_sv_len_utf8(pTHX_ register SV *const sv) if (!sv) return 0; - if (SvGMAGICAL(sv)) - return mg_length(sv); - else - { - SvGETMAGIC(sv); - return sv_len_utf8_nomg(sv); - } + SvGETMAGIC(sv); + return sv_len_utf8_nomg(sv); } STRLEN @@ -6532,7 +6527,7 @@ Perl_sv_len_utf8_nomg(pTHX_ SV * const sv) PERL_ARGS_ASSERT_SV_LEN_UTF8_NOMG; - if (PL_utf8cache) { + if (PL_utf8cache && SvUTF8(sv)) { STRLEN ulen; MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; @@ -6559,7 +6554,7 @@ Perl_sv_len_utf8_nomg(pTHX_ SV * const sv) } return ulen; } - return Perl_utf8_length(aTHX_ s, s + len); + return SvUTF8(sv) ? Perl_utf8_length(aTHX_ s, s + len) : len; } /* Walk forwards to find the byte corresponding to the passed in UTF-8 -- 2.7.4