From d931b1bedc59d4625c59047dfda2b1bd83ff9f71 Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Sun, 14 Feb 2010 12:31:44 +0000 Subject: [PATCH] Convert Perl_sv_pos_u2b_proper() to Perl_sv_pos_u2b_flags(). Change from a value/return offset pointer to passing a Unicode offset, and returning a byte offset. The optional length value/return pointer remains. Add a flags argument, passed to SvPV_flags(). This allows the caller to specify whether mg_get() should be called on sv. --- embed.fnc | 3 ++- embed.h | 4 ++-- global.sym | 2 +- mg.c | 6 +++--- pp.c | 6 +++--- proto.h | 8 ++++---- sv.c | 52 +++++++++++++++++++++++++++------------------------- 7 files changed, 42 insertions(+), 39 deletions(-) diff --git a/embed.fnc b/embed.fnc index 7e450aa..769481b 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1165,7 +1165,8 @@ ApdR |SV* |sv_newmortal Apd |SV* |sv_newref |NULLOK SV *const sv Ap |char* |sv_peek |NULLOK SV* sv Apd |void |sv_pos_u2b |NULLOK SV *const sv|NN I32 *const offsetp|NULLOK I32 *const lenp -Apd |void |sv_pos_u2b_proper|NULLOK SV *const sv|NN STRLEN *const offsetp|NULLOK STRLEN *const lenp +Apd |STRLEN |sv_pos_u2b_flags|NN SV *const sv|STRLEN uoffset \ + |NULLOK STRLEN *const lenp|U32 flags Apd |void |sv_pos_b2u |NULLOK SV *const sv|NN I32 *const offsetp Amdb |char* |sv_pvn_force |NN SV* sv|NULLOK STRLEN* lp Apd |char* |sv_pvutf8n_force|NN SV *const sv|NULLOK STRLEN *const lp diff --git a/embed.h b/embed.h index 1281fcc..eda36a7 100644 --- a/embed.h +++ b/embed.h @@ -967,7 +967,7 @@ #define sv_newref Perl_sv_newref #define sv_peek Perl_sv_peek #define sv_pos_u2b Perl_sv_pos_u2b -#define sv_pos_u2b_proper Perl_sv_pos_u2b_proper +#define sv_pos_u2b_flags Perl_sv_pos_u2b_flags #define sv_pos_b2u Perl_sv_pos_b2u #define sv_pvutf8n_force Perl_sv_pvutf8n_force #define sv_pvbyten_force Perl_sv_pvbyten_force @@ -3372,7 +3372,7 @@ #define sv_newref(a) Perl_sv_newref(aTHX_ a) #define sv_peek(a) Perl_sv_peek(aTHX_ a) #define sv_pos_u2b(a,b,c) Perl_sv_pos_u2b(aTHX_ a,b,c) -#define sv_pos_u2b_proper(a,b,c) Perl_sv_pos_u2b_proper(aTHX_ a,b,c) +#define sv_pos_u2b_flags(a,b,c,d) Perl_sv_pos_u2b_flags(aTHX_ a,b,c,d) #define sv_pos_b2u(a,b) Perl_sv_pos_b2u(aTHX_ a,b) #define sv_pvutf8n_force(a,b) Perl_sv_pvutf8n_force(aTHX_ a,b) #define sv_pvbyten_force(a,b) Perl_sv_pvbyten_force(aTHX_ a,b) diff --git a/global.sym b/global.sym index f0e462e..a99548e 100644 --- a/global.sym +++ b/global.sym @@ -567,7 +567,7 @@ Perl_sv_newmortal Perl_sv_newref Perl_sv_peek Perl_sv_pos_u2b -Perl_sv_pos_u2b_proper +Perl_sv_pos_u2b_flags Perl_sv_pos_b2u Perl_sv_pvn_force Perl_sv_pvutf8n_force diff --git a/mg.c b/mg.c index 3f51a17..cc01547 100644 --- a/mg.c +++ b/mg.c @@ -2015,7 +2015,7 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg) PERL_UNUSED_ARG(mg); if (SvUTF8(lsv)) - sv_pos_u2b_proper(lsv, &offs, &rem); + offs = sv_pos_u2b_flags(lsv, offs, &rem, SV_CONST_RETURN); if (offs > len) offs = len; if (rem > len - offs) @@ -2041,14 +2041,14 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) if (DO_UTF8(sv)) { sv_utf8_upgrade(lsv); - sv_pos_u2b_proper(lsv, &lvoff, &lvlen); + lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); sv_insert(lsv, lvoff, lvlen, tmps, len); LvTARGLEN(sv) = sv_len_utf8(sv); SvUTF8_on(lsv); } else if (lsv && SvUTF8(lsv)) { const char *utf8; - sv_pos_u2b_proper(lsv, &lvoff, &lvlen); + lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); LvTARGLEN(sv) = len; utf8 = (char*)bytes_to_utf8((U8*)tmps, &len); sv_insert(lsv, lvoff, lvlen, utf8, len); diff --git a/pp.c b/pp.c index 8f01e85..3e2ed48 100644 --- a/pp.c +++ b/pp.c @@ -3204,10 +3204,10 @@ PP(pp_substr) /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ const STRLEN pos = (STRLEN)( (UV)pos1_iv ); const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); - STRLEN byte_pos = pos; STRLEN byte_len = len; - if (utf8_curlen) - sv_pos_u2b_proper(sv, &byte_pos, &byte_len); + STRLEN byte_pos = utf8_curlen + ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos; + tmps += byte_pos; /* we either return a PV or an LV. If the TARG hasn't been used * before, or is of that type, reuse it; otherwise use a mortal diff --git a/proto.h b/proto.h index ae48597..01be297 100644 --- a/proto.h +++ b/proto.h @@ -3374,10 +3374,10 @@ PERL_CALLCONV void Perl_sv_pos_u2b(pTHX_ SV *const sv, I32 *const offsetp, I32 * #define PERL_ARGS_ASSERT_SV_POS_U2B \ assert(offsetp) -PERL_CALLCONV void Perl_sv_pos_u2b_proper(pTHX_ SV *const sv, STRLEN *const offsetp, STRLEN *const lenp) - __attribute__nonnull__(pTHX_2); -#define PERL_ARGS_ASSERT_SV_POS_U2B_PROPER \ - assert(offsetp) +PERL_CALLCONV STRLEN Perl_sv_pos_u2b_flags(pTHX_ SV *const sv, STRLEN uoffset, STRLEN *const lenp, U32 flags) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_SV_POS_U2B_FLAGS \ + assert(sv) PERL_CALLCONV void Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp) __attribute__nonnull__(pTHX_2); diff --git a/sv.c b/sv.c index 02be580..57231ec 100644 --- a/sv.c +++ b/sv.c @@ -6240,44 +6240,40 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start /* -=for apidoc sv_pos_u2b_proper +=for apidoc sv_pos_u2b_flags Converts the value pointed to by offsetp from a count of UTF-8 chars from the start of the string, to a count of the equivalent number of bytes; if lenp is non-zero, it does the same to lenp, but this time starting from -the offset, rather than from the start of the string. Handles magic and -type coercion. +the offset, rather than from the start of the string. Handles type coercion. +I is passed to C, and usually should be +C to handle magic. =cut */ /* - * sv_pos_u2b_proper() uses, like sv_pos_b2u(), the mg_ptr of the potential + * sv_pos_u2b_flags() uses, like sv_pos_b2u(), the mg_ptr of the potential * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and * byte offsets. See also the comments of S_utf8_mg_pos_cache_update(). * */ -void -Perl_sv_pos_u2b_proper(pTHX_ register SV *const sv, STRLEN *const offsetp, STRLEN *const lenp) +STRLEN +Perl_sv_pos_u2b_flags(pTHX_ SV *const sv, STRLEN uoffset, STRLEN *const lenp, + U32 flags) { const U8 *start; STRLEN len; + STRLEN boffset; - PERL_ARGS_ASSERT_SV_POS_U2B; + PERL_ARGS_ASSERT_SV_POS_U2B_FLAGS; - if (!sv) - return; - - start = (U8*)SvPV_const(sv, len); + start = (U8*)SvPV_flags(sv, len, flags); if (len) { - STRLEN uoffset = *offsetp; const U8 * const send = start + len; MAGIC *mg = NULL; - const STRLEN boffset = sv_pos_u2b_cached(sv, &mg, start, send, - uoffset, 0, 0); - - *offsetp = boffset; + boffset = sv_pos_u2b_cached(sv, &mg, start, send, uoffset, 0, 0); if (lenp) { /* Convert the relative offset to absolute. */ @@ -6288,14 +6284,13 @@ Perl_sv_pos_u2b_proper(pTHX_ register SV *const sv, STRLEN *const offsetp, STRLE *lenp = boffset2; } - } - else { - *offsetp = 0; - if (lenp) - *lenp = 0; + } else { + if (lenp) + *lenp = 0; + boffset = 0; } - return; + return boffset; } /* @@ -6307,6 +6302,9 @@ lenp is non-zero, it does the same to lenp, but this time starting from the offset, rather than from the start of the string. Handles magic and type coercion. +Use C in preference, which correctly handles strings longer +than 2Gb. + =cut */ @@ -6323,14 +6321,18 @@ void Perl_sv_pos_u2b(pTHX_ register SV *const sv, I32 *const offsetp, I32 *const lenp) { STRLEN uoffset = (STRLEN)*offsetp; + + PERL_ARGS_ASSERT_SV_POS_U2B; + if (lenp) { STRLEN ulen = (STRLEN)*lenp; - sv_pos_u2b_proper(sv, &uoffset, &ulen); + *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, &ulen, + SV_GMAGIC|SV_CONST_RETURN); *lenp = (I32)ulen; } else { - sv_pos_u2b_proper(sv, &uoffset, NULL); + *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, NULL, + SV_GMAGIC|SV_CONST_RETURN); } - *offsetp = (I32)uoffset; } /* Create and update the UTF8 magic offset cache, with the proffered utf8/ -- 2.7.4