From c9b3c8d047f74379a7deb3b62371eeeaa726d5db Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sat, 15 Jul 2000 14:11:02 +0000 Subject: [PATCH] Fix the bitvector ops for utf8 (tricky since past 7 bits the utf8 'characters' can be more than one octet). Date: Sat, 15 Jul 2000 00:21:56 +0100 From: Tom Hughes Subject: Re: [ID 20000714.002] Message-ID: Reported in Subject: [ID 20000714.002] From: "Simon Cozens" Date: 14 Jul 2000 15:13:09 -0000 Message-Id: <20000714151309.7170.qmail@othersideofthe.earth.li> p4raw-id: //depot/perl@6415 --- doop.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/doop.c b/doop.c index d8a0340..3cdc23d 100644 --- a/doop.c +++ b/doop.c @@ -903,6 +903,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) char *rsave; bool left_utf = DO_UTF8(left); bool right_utf = DO_UTF8(right); + I32 needlen; if (left_utf && !right_utf) sv_utf8_upgrade(right); @@ -915,17 +916,23 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rsave = rc = SvPV(right, rightlen); len = leftlen < rightlen ? leftlen : rightlen; lensave = len; - if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { + if ((left_utf || right_utf) && (sv == left || sv == right)) { + needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; + Newz(801, dc, needlen + 1, char); + } + else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { STRLEN n_a; dc = SvPV_force(sv, n_a); if (SvCUR(sv) < len) { dc = SvGROW(sv, len + 1); (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); } + if (optype != OP_BIT_AND && (left_utf || right_utf)) + dc = SvGROW(sv, leftlen + rightlen + 1); } else { - I32 needlen = ((optype == OP_BIT_AND) - ? len : (leftlen > rightlen ? leftlen : rightlen)); + needlen = ((optype == OP_BIT_AND) + ? len : (leftlen > rightlen ? leftlen : rightlen)); Newz(801, dc, needlen + 1, char); (void)sv_usepvn(sv, dc, needlen); dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ @@ -934,14 +941,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) (void)SvPOK_only(sv); if (left_utf || right_utf) { UV duc, luc, ruc; + char *dcsave = dc; STRLEN lulen = leftlen; STRLEN rulen = rightlen; - STRLEN dulen = 0; I32 ulen; - if (optype != OP_BIT_AND) - dc = SvGROW(sv, leftlen+rightlen+1); - switch (optype) { case OP_BIT_AND: while (lulen && rulen) { @@ -954,8 +958,9 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) duc = luc & ruc; dc = (char*)uv_to_utf8((U8*)dc, duc); } - dulen = dc - SvPVX(sv); - SvCUR_set(sv, dulen); + if (sv == left || sv == right) + (void)sv_usepvn(sv, dcsave, needlen); + SvCUR_set(sv, dc - dcsave); break; case OP_BIT_XOR: while (lulen && rulen) { @@ -981,8 +986,9 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) dc = (char*)uv_to_utf8((U8*)dc, duc); } mop_up_utf: - dulen = dc - SvPVX(sv); - SvCUR_set(sv, dulen); + if (sv == left || sv == right) + (void)sv_usepvn(sv, dcsave, needlen); + SvCUR_set(sv, dc - dcsave); if (rulen) sv_catpvn(sv, rc, rulen); else if (lulen) -- 2.7.4