CODE:
pv = SvPV_nolen(sv);
+SV *
+HvENAME(HV *hv)
+CODE:
+ RETVAL = hv && HvENAME(hv)
+ ? newSVpvn_flags(
+ HvENAME(hv),HvENAMELEN(hv),
+ (HvENAMEUTF8(hv) ? SVf_UTF8 : 0)
+ )
+ : NULL;
+OUTPUT:
+ RETVAL
+
MODULE = XS::APItest PACKAGE = XS::APItest::Magic
is "@objs", "", 'freeing a hash with nulls frees all entries';
}
+# Tests for HvENAME and UTF8
+{
+ no strict;
+ no warnings 'void';
+ my $hvref;
+
+ *{"\xff::bar"}; # autovivify %ÿ:: without UTF8
+ *{"\xff::bαr::"} = $hvref = \%foo::;
+ undef *foo::;
+ is HvENAME($hvref), "\xff::bαr",
+ 'stash alias (utf8 inside bytes) does not create malformed UTF8';
+
+ *{"é::foo"}; # autovivify %é:: with UTF8
+ *{"\xe9::\xe9::"} = $hvref = \%bar::;
+ undef *bar::;
+ is HvENAME($hvref), "\xe9::\xe9",
+ 'stash alias (bytes inside utf8) does not create malformed UTF8';
+
+ *{"\xfe::bar"}; *{"\xfd::bar"};
+ *{"\xfe::bαr::"} = \%goo::;
+ *{"\xfd::bαr::"} = $hvref = \%goo::;
+ undef *goo::;
+ like HvENAME($hvref), qr/^[\xfe\xfd]::bαr\z/,
+ 'multiple stash aliases (utf8 inside bytes) do not cause bad UTF8';
+
+ *{"è::foo"}; *{"ë::foo"};
+ *{"\xe8::\xe9::"} = $hvref = \%bear::;
+ *{"\xeb::\xe9::"} = \%bear::;
+ undef *bear::;
+ like HvENAME($hvref), qr"^[\xe8\xeb]::\xe9\z",
+ 'multiple stash aliases (bytes inside utf8) do not cause bad UTF8';
+}
done_testing;
exit;
else sv_catpvs(namesv, "::");
}
if (GvNAMELEN(gv) != 1) {
- sv_catpvn(namesv, GvNAME(gv), GvNAMELEN(gv) - 2);
+ sv_catpvn_flags(
+ namesv, GvNAME(gv), GvNAMELEN(gv) - 2,
/* skip trailing :: */
- if ( GvNAMEUTF8(gv) )
- SvUTF8_on(namesv);
+ GvNAMEUTF8(gv) ? SV_CATUTF8 : SV_CATBYTES
+ );
}
}
else {
else sv_catpvs(aname, "::");
}
if (GvNAMELEN(gv) != 1) {
- sv_catpvn(aname, GvNAME(gv), GvNAMELEN(gv) - 2);
+ sv_catpvn_flags(
+ aname, GvNAME(gv), GvNAMELEN(gv) - 2,
/* skip trailing :: */
- if ( GvNAMEUTF8(gv) )
- SvUTF8_on(aname);
+ GvNAMEUTF8(gv) ? SV_CATUTF8 : SV_CATBYTES
+ );
}
av_push((AV *)namesv, aname);
}
sv_catpvs(aname, ":");
else {
sv_catpvs(aname, "::");
- sv_catpvn(aname, key, len-2);
- if ( SvUTF8(keysv) )
- SvUTF8_on(aname);
+ sv_catpvn_flags(
+ aname, key, len-2,
+ SvUTF8(keysv)
+ ? SV_CATUTF8 : SV_CATBYTES
+ );
}
av_push((AV *)subname, aname);
}
if (len == 1) sv_catpvs(subname, ":");
else {
sv_catpvs(subname, "::");
- sv_catpvn(subname, key, len-2);
- if ( SvUTF8(keysv) )
- SvUTF8_on(subname);
+ sv_catpvn_flags(
+ subname, key, len-2,
+ SvUTF8(keysv) ? SV_CATUTF8 : SV_CATBYTES
+ );
}
}
mro_gather_and_rename(
sv_catpvs(aname, ":");
else {
sv_catpvs(aname, "::");
- sv_catpvn(aname, key, len-2);
- if ( SvUTF8(keysv) )
- SvUTF8_on(aname);
+ sv_catpvn_flags(
+ aname, key, len-2,
+ SvUTF8(keysv)
+ ? SV_CATUTF8 : SV_CATBYTES
+ );
}
av_push((AV *)subname, aname);
}
if (len == 1) sv_catpvs(subname, ":");
else {
sv_catpvs(subname, "::");
- sv_catpvn(subname, key, len-2);
- if ( SvUTF8(keysv) )
- SvUTF8_on(subname);
+ sv_catpvn_flags(
+ subname, key, len-2,
+ SvUTF8(keysv) ? SV_CATUTF8 : SV_CATBYTES
+ );
}
}
mro_gather_and_rename(
const char * const dstr = SvPV_force_flags(dsv, dlen, flags);
PERL_ARGS_ASSERT_SV_CATPVN_FLAGS;
+ assert((flags & (SV_CATBYTES|SV_CATUTF8)) != (SV_CATBYTES|SV_CATUTF8));
- SvGROW(dsv, dlen + slen + 1);
- if (sstr == dstr)
+ if (!(flags & SV_CATBYTES) || !SvUTF8(dsv)) {
+ if (flags & SV_CATUTF8 && !SvUTF8(dsv)) {
+ sv_utf8_upgrade_flags_grow(dsv, 0, slen);
+ dlen = SvCUR(dsv);
+ }
+ else SvGROW(dsv, dlen + slen + 1);
+ if (sstr == dstr)
sstr = SvPVX_const(dsv);
- Move(sstr, SvPVX(dsv) + dlen, slen, char);
- SvCUR_set(dsv, SvCUR(dsv) + slen);
+ Move(sstr, SvPVX(dsv) + dlen, slen, char);
+ SvCUR_set(dsv, SvCUR(dsv) + slen);
+ }
+ else {
+ /* We inline bytes_to_utf8, to avoid an extra malloc. */
+ const char * const send = sstr + slen;
+ U8 *d;
+
+ /* Something this code does not account for, which I think is
+ impossible; it would require the same pv to be treated as
+ bytes *and* utf8, which would indicate a bug elsewhere. */
+ assert(sstr != dstr);
+
+ SvGROW(dsv, dlen + slen * 2);
+ d = (U8 *)SvPVX(dsv) + dlen;
+
+ while (sstr < send) {
+ const UV uv = NATIVE_TO_ASCII((U8)*sstr++);
+ if (UNI_IS_INVARIANT(uv))
+ *d++ = (U8)UTF_TO_NATIVE(uv);
+ else {
+ *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
+ *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
+ }
+ }
+ SvCUR_set(dsv, d-(const U8 *)SvPVX(dsv));
+ }
*SvEND(dsv) = '\0';
(void)SvPOK_only_UTF8(dsv); /* validate pointer */
SvTAINT(dsv);
/* if (after resolving magic etc), the SV is found to be overloaded,
* don't call the overload magic, just return as-is */
#define SV_SKIP_OVERLOAD 8192
+/* It is not yet clear whether we want this as an API, or what the
+ * constants should be named. */
+#ifdef PERL_CORE
+# define SV_CATBYTES 16384
+# define SV_CATUTF8 32768
+#endif
/* The core is safe for this COW optimisation. XS code on CPAN may not be.
So only default to doing the COW setup if we're in the core.
=cut
*/
+/* This logic is duplicated in sv_catpvn_flags, so any bug fixes will
+ likewise need duplication. */
+
U8*
Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *len)
{