Perl_dump_indent(aTHX_ level, file, " DUP\n");
if (mg->mg_flags & MGf_LOCAL)
Perl_dump_indent(aTHX_ level, file, " LOCAL\n");
+ if (mg->mg_type == PERL_MAGIC_regex_global &&
+ mg->mg_flags & MGf_BYTES)
+ Perl_dump_indent(aTHX_ level, file, " BYTES\n");
}
if (mg->mg_obj) {
Perl_dump_indent(aTHX_ level, file, " MG_OBJ = 0x%"UVxf"\n",
Xop |bool |feature_is_enabled|NN const char *const name \
|STRLEN namelen
+: Some static inline functions need predeclaration because they are used
+: inside other static inline functions.
+Ei |STRLEN |sv_or_pv_pos_u2b|NN SV *sv|NN const char *pv|STRLEN pos \
+ |NULLOK STRLEN *lenp
+
: ex: set ts=8 sts=4 sw=4 noet:
#define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c)
#define report_uninit(a) Perl_report_uninit(aTHX_ a)
#define sv_magicext_mglob(a) Perl_sv_magicext_mglob(aTHX_ a)
+#define sv_or_pv_pos_u2b(a,b,c,d) S_sv_or_pv_pos_u2b(aTHX_ a,b,c,d)
#define validate_proto(a,b,c) Perl_validate_proto(aTHX_ a,b,c)
#define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a)
#define yylex() Perl_yylex(aTHX)
MAGIC = $ADDR
MG_VIRTUAL = &PL_vtbl_mglob
MG_TYPE = PERL_MAGIC_regex_global\\(g\\)
- MG_FLAGS = 0x01
- MINMATCH');
+ MG_FLAGS = 0x01 # $] < 5.019003
+ MG_FLAGS = 0x41 # $] >=5.019003
+ MINMATCH
+ BYTES # $] >=5.019003
+');
#
# TAINTEDDIR is not set on: OS2, AMIGAOS, WIN32, MSDOS
}
#endif
+/* ------------------------------- mg.h ------------------------------- */
+
+#if defined(PERL_CORE) || defined(PERL_EXT)
+/* assumes get-magic and stringification have already occurred */
+PERL_STATIC_INLINE STRLEN
+S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
+{
+ assert(mg->mg_type == PERL_MAGIC_regex_global);
+ assert(mg->mg_len != -1);
+ if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
+ return (STRLEN)mg->mg_len;
+ else {
+ const STRLEN pos = (STRLEN)mg->mg_len;
+ /* Without this check, we may read past the end of the buffer: */
+ if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
+ return sv_or_pv_pos_u2b(sv, s, pos, NULL);
+ }
+}
+#endif
+
/* ----------------------------- regexp.h ----------------------------- */
PERL_STATIC_INLINE struct regexp *
assert(SvFLAGS(sv) & SVs_PADMY);
return SvFLAGS(sv) &= ~SVs_PADSTALE;
}
-#ifdef PERL_CORE
+#if defined(PERL_CORE) || defined (PERL_EXT)
PERL_STATIC_INLINE STRLEN
S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
{
+ PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
if (SvGAMAGIC(sv)) {
U8 *hopped = utf8_hop((U8 *)pv, pos);
if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
if (found && found->mg_len != -1) {
STRLEN i = found->mg_len;
- if (DO_UTF8(lsv))
+ if (found->mg_flags & MGf_BYTES && DO_UTF8(lsv))
i = sv_pos_b2u_flags(lsv, i, SV_GMAGIC|SV_CONST_RETURN);
sv_setuv(sv, i);
return 0;
else if (pos > (SSize_t)len)
pos = len;
- if (ulen) {
- pos = sv_or_pv_pos_u2b(lsv, s, pos, 0);
- }
-
found->mg_len = pos;
- found->mg_flags &= ~MGf_MINMATCH;
+ found->mg_flags &= ~(MGf_MINMATCH|MGf_BYTES);
return 0;
}
#define MGf_COPY 8 /* has an svt_copy MGVTBL entry */
#define MGf_DUP 0x10 /* has an svt_dup MGVTBL entry */
#define MGf_LOCAL 0x20 /* has an svt_local MGVTBL entry */
+#define MGf_BYTES 0x40 /* PERL_MAGIC_regex_global only */
#define MgTAINTEDDIR(mg) (mg->mg_flags & MGf_TAINTEDDIR)
#define MgTAINTEDDIR_on(mg) (mg->mg_flags |= MGf_TAINTEDDIR)
#define SvTIED_obj(sv,mg) \
((mg)->mg_obj ? (mg)->mg_obj : sv_2mortal(newRV(sv)))
+#if defined(PERL_CORE) || defined(PERL_EXT)
+# define MgBYTEPOS(mg,sv,pv,len) S_MgBYTEPOS(aTHX_ mg,sv,pv,len)
+/* assumes get-magic and stringification have already occurred */
+# define MgBYTEPOS_set(mg,sv,pv,off) ( \
+ assert_((mg)->mg_type == PERL_MAGIC_regex_global) \
+ SvPOK(sv) && !SvGMAGICAL(sv) \
+ ? (mg)->mg_len = (off), (mg)->mg_flags |= MGf_BYTES \
+ : ((mg)->mg_len = DO_UTF8(sv) \
+ ? utf8_length((U8 *)(pv), (U8 *)(pv)+(off)) \
+ : (off), \
+ (mg)->mg_flags &= ~MGf_BYTES))
+#endif
+
#define whichsig(pv) whichsig_pv(pv)
/*
if (mg && mg->mg_len != -1) {
dTARGET;
STRLEN i = mg->mg_len;
- if (DO_UTF8(sv))
+ if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
PUSHu(i);
RETURN;
if (!(mg = mg_find_mglob(sv))) {
mg = sv_magicext_mglob(sv);
}
- mg->mg_len = m - orig;
+ assert(SvPOK(dstr));
+ MgBYTEPOS_set(mg, sv, SvPVX(dstr), m - orig);
}
if (old != rx)
(void)ReREFCNT_inc(rx);
if (global) {
mg = mg_find_mglob(TARG);
if (mg && mg->mg_len >= 0) {
- curpos = mg->mg_len;
+ curpos = MgBYTEPOS(mg, TARG, truebase, len);
/* last time pos() was set, it was zero-length match */
if (mg->mg_flags & MGf_MINMATCH)
had_zerolen = 1;
if (global && (gimme != G_ARRAY || (dynpm->op_pmflags & PMf_CONTINUE))) {
if (!mg)
mg = sv_magicext_mglob(TARG);
- mg->mg_len = RX_OFFS(rx)[0].end;
+ MgBYTEPOS_set(mg, TARG, truebase, RX_OFFS(rx)[0].end);
if (RX_ZERO_LEN(rx))
mg->mg_flags |= MGf_MINMATCH;
else
#define PERL_ARGS_ASSERT_SV_NV \
assert(sv)
+PERL_STATIC_INLINE STRLEN S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B \
+ assert(sv); assert(pv)
+
PERL_CALLCONV char* Perl_sv_peek(pTHX_ SV* sv);
PERL_CALLCONV void Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
__attribute__nonnull__(pTHX_2);
(flags & REXEC_IGNOREPOS)
? stringarg /* use start pos rather than pos() */
: (sv && (mg = mg_find_mglob(sv)) && mg->mg_len >= 0)
- ? strbeg + mg->mg_len /* Defined pos() */
+ /* Defined pos(): */
+ ? strbeg + MgBYTEPOS(mg, sv, strbeg, strend-strbeg)
: strbeg; /* pos() not defined; use start of string */
DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
rex->offs[0].end = locinput - reginfo->strbeg;
if (reginfo->info_aux_eval->pos_magic)
- reginfo->info_aux_eval->pos_magic->mg_len
- = locinput - reginfo->strbeg;
+ MgBYTEPOS_set(reginfo->info_aux_eval->pos_magic,
+ reginfo->sv, reginfo->strbeg,
+ locinput - reginfo->strbeg);
if (sv_yes_mark) {
SV *sv_mrk = get_sv("REGMARK", 1);
}
eval_state->pos_magic = mg;
eval_state->pos = mg->mg_len;
+ eval_state->pos_flags = mg->mg_flags;
}
else
eval_state->pos_magic = NULL;
RXp_MATCH_COPIED_on(rex);
}
if (eval_state->pos_magic)
+ {
eval_state->pos_magic->mg_len = eval_state->pos;
+ eval_state->pos_magic->mg_flags =
+ (eval_state->pos_magic->mg_flags & ~MGf_BYTES)
+ | (eval_state->pos_flags & MGf_BYTES);
+ }
PL_curpm = eval_state->curpm;
}
STRLEN subcoffset; /* saved subcoffset field from rex */
MAGIC *pos_magic; /* pos() magic attached to $_ */
I32 pos; /* the original value of pos() in pos_magic */
+ U8 pos_flags; /* flags to be restored; currently only MGf_BYTES*/
} regmatch_info_aux_eval;
if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
/* Update pos. We do it at the end rather than during
* the upgrade, to avoid slowing down the common case
- * (upgrade without pos) */
+ * (upgrade without pos).
+ * pos can be stored as either bytes or characters. Since
+ * this was previously a byte string we can just turn off
+ * the bytes flag. */
MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
if (mg) {
- I32 pos = mg->mg_len;
- if (pos > 0 && (U32)pos > invariant_head) {
- U8 *d = (U8*) SvPVX(sv) + invariant_head;
- STRLEN n = (U32)pos - invariant_head;
- while (n > 0) {
- if (UTF8_IS_START(*d))
- d++;
- d++;
- n--;
- }
- mg->mg_len = d - (U8*)SvPVX(sv);
- }
+ mg->mg_flags &= ~MGf_BYTES;
}
if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
magic_setutf8(sv,mg); /* clear UTF8 cache */
if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
/* update pos */
MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
- if (mg) {
- I32 pos = mg->mg_len;
- if (pos > 0) {
- sv_pos_b2u(sv, &pos);
+ if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) {
+ mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len,
+ SV_GMAGIC|SV_CONST_RETURN);
mg_flags = 0; /* sv_pos_b2u does get magic */
- mg->mg_len = pos;
- }
}
if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
magic_setutf8(sv,mg); /* clear UTF8 cache */
}
}
if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
+ /* XXX Is this dead code? XS_utf8_decode calls SvSETMAGIC
+ after this, clearing pos. Does anything on CPAN
+ need this? */
/* adjust pos to the start of a UTF8 char sequence */
MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
if (mg) {
}
}
+ /* Force pos to be stored as characters, not bytes. */
+ if (SvMAGICAL(sv) && DO_UTF8(sv)
+ && (mg = mg_find(sv, PERL_MAGIC_regex_global))
+ && mg->mg_len != -1
+ && mg->mg_flags & MGf_BYTES) {
+ mg->mg_len = (SSize_t)sv_pos_b2u_flags(sv, (STRLEN)mg->mg_len,
+ SV_CONST_RETURN);
+ mg->mg_flags &= ~MGf_BYTES;
+ }
+
/* Rest of work is done else where */
mg = sv_magicext(sv,obj,how,vtable,name,namlen);
#define sv_catpvn_nomg_maybeutf8(dsv, sstr, slen, is_utf8) \
sv_catpvn_flags(dsv, sstr, slen, (is_utf8)?SV_CATUTF8:SV_CATBYTES)
-#ifdef PERL_CORE
+#if defined(PERL_CORE) || defined(PERL_EXT)
# define sv_or_pv_len_utf8(sv, pv, bytelen) \
(SvGAMAGIC(sv) \
? utf8_length((U8 *)(pv), (U8 *)(pv)+(bytelen)) \
: sv_len_utf8(sv))
-# define sv_or_pv_pos_u2b(sv,s,p,lp) S_sv_or_pv_pos_u2b(aTHX_ sv,s,p,lp)
#endif
/*
require './test.pl';
}
-plan tests => 22;
+plan tests => 28;
$x='banana';
$x=~/.a/g;
pos $h{n} = 1;
ok $_[3] =~ /\Ge/, '\G works with defelem scalars';
}->($h{k}, $h{l}, $h{m}, $h{n});
+
+$x = bless [], chr 256;
+pos $x=1;
+bless $x, a;
+is pos($x), 1, 'pos is not affected by reference stringification changing';
+{
+ my $w;
+ local $SIG{__WARN__} = sub { $w .= shift };
+ $x = bless [], chr 256;
+ pos $x=1;
+ bless $x, "\x{1000}";
+ is pos $x, 1,
+ 'pos unchanged after increasing size of chars in stringification';
+ is $w, undef, 'and no malformed utf8 warning';
+}
+$x = bless [], chr 256;
+$x =~ /.(?{
+ bless $x, a;
+ is pos($x), 1, 'pos unaffected by ref str changing (in re-eval)';
+})/;
+{
+ my $w;
+ local $SIG{__WARN__} = sub { $w .= shift };
+ $x = bless [], chr(256);
+ $x =~ /.(?{
+ bless $x, "\x{1000}";
+ is pos $x, 1,
+ 'pos unchanged in re-eval after increasing size of chars in str';
+ })/;
+ is $w, undef, 'and no malformed utf8 warning';
+}