#define sv_pos_b2u Perl_sv_pos_b2u
#define sv_pvutf8n_force Perl_sv_pvutf8n_force
#define sv_pvbyten_force Perl_sv_pvbyten_force
+#define sv_recode_to_utf8 Perl_sv_recode_to_utf8
#define sv_reftype Perl_sv_reftype
#define sv_replace Perl_sv_replace
#define sv_report_used Perl_sv_report_used
#define sv_pos_b2u(a,b) Perl_sv_pos_b2u(aTHX_ a,b)
#define sv_pvutf8n_force(a,b) Perl_sv_pvutf8n_force(aTHX_ a,b)
#define sv_pvbyten_force(a,b) Perl_sv_pvbyten_force(aTHX_ a,b)
+#define sv_recode_to_utf8(a) Perl_sv_recode_to_utf8(aTHX_ a)
#define sv_reftype(a,b) Perl_sv_reftype(aTHX_ a,b)
#define sv_replace(a,b) Perl_sv_replace(aTHX_ a,b)
#define sv_report_used() Perl_sv_report_used(aTHX)
Amd |char* |sv_pvn_force |SV* sv|STRLEN* lp
Apd |char* |sv_pvutf8n_force|SV* sv|STRLEN* lp
Apd |char* |sv_pvbyten_force|SV* sv|STRLEN* lp
+Apd |void |sv_recode_to_utf8 |SV*
Apd |char* |sv_reftype |SV* sv|int ob
Apd |void |sv_replace |SV* sv|SV* nsv
Apd |void |sv_report_used
Perl_sv_pos_b2u
Perl_sv_pvutf8n_force
Perl_sv_pvbyten_force
+Perl_sv_recode_to_utf8
Perl_sv_reftype
Perl_sv_replace
Perl_sv_report_used
encoding pragma you can change this default.
The pragma is a per script, not a per block lexical. Only the last
-'use encoding' seen matters.
+C<use encoding> matters, and it affects B<the whole script>.
=head1 FUTURE POSSIBILITIES
-The C<\x..> and C<\0...> in literals and regular expressions are not
-affected by this pragma. They probably should. Ditto C<\N{...}>.
+The C<\x..> and C<\0...> in regular expressions are not
+affected by this pragma. They probably should.
+
+Also C<\N{...}> might become affected.
=head1 SEE ALSO
-print "1..3\n";
+print "1..5\n";
use encoding "latin1"; # ignored (overwritten by the next line)
use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
-$a = "\xDF";
-$b = "\x{100}";
-
-my $c = $a . $b;
-
# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
-print "not " unless ord($c) == 0x3af;
+$a = "\xDF";
+$b = "\x{100}";
+
+print "not " unless ord($a) == 0x3af;
print "ok 1\n";
-print "not " unless length($c) == 2;
+print "not " unless ord($b) == 0x100;
print "ok 2\n";
-print "not " unless ord(substr($c, 1, 1)) == 0x100;
+my $c;
+
+$c = $a . $b;
+
+print "not " unless ord($c) == 0x3af;
print "ok 3\n";
+print "not " unless length($c) == 2;
+print "ok 4\n";
+
+print "not " unless ord(substr($c, 1, 1)) == 0x100;
+print "ok 5\n";
=for hackers
Found in file sv.c
-=item newSV
-
-Create a new null SV, or if len > 0, create a new empty SVt_PV type SV
-with an initial PV allocation of len+1. Normally accessed via the C<NEWSV>
-macro.
-
- SV* newSV(STRLEN len)
-
-=for hackers
-Found in file sv.c
-
=item NEWSV
Creates a new SV. A non-zero C<len> parameter indicates the number of
=for hackers
Found in file handy.h
+=item newSV
+
+Create a new null SV, or if len > 0, create a new empty SVt_PV type SV
+with an initial PV allocation of len+1. Normally accessed via the C<NEWSV>
+macro.
+
+ SV* newSV(STRLEN len)
+
+=for hackers
+Found in file sv.c
+
=item newSViv
Creates a new SV and copies an integer into it. The reference count for the
=for hackers
Found in file sv.h
-=item SvIVX
+=item SvIVx
-Returns the raw value in the SV's IV slot, without checks or conversions.
-Only use when you are sure SvIOK is true. See also C<SvIV()>.
+Coerces the given SV to an integer and returns it. Guarantees to evaluate
+sv only once. Use the more efficent C<SvIV> otherwise.
- IV SvIVX(SV* sv)
+ IV SvIVx(SV* sv)
=for hackers
Found in file sv.h
-=item SvIVx
+=item SvIVX
-Coerces the given SV to an integer and returns it. Guarantees to evaluate
-sv only once. Use the more efficent C<SvIV> otherwise.
+Returns the raw value in the SV's IV slot, without checks or conversions.
+Only use when you are sure SvIOK is true. See also C<SvIV()>.
- IV SvIVx(SV* sv)
+ IV SvIVX(SV* sv)
=for hackers
Found in file sv.h
=for hackers
Found in file sv.h
-=item SvPVx
+=item SvPVX
-A version of C<SvPV> which guarantees to evaluate sv only once.
+Returns a pointer to the physical string in the SV. The SV must contain a
+string.
- char* SvPVx(SV* sv, STRLEN len)
+ char* SvPVX(SV* sv)
=for hackers
Found in file sv.h
-=item SvPVX
+=item SvPVx
-Returns a pointer to the physical string in the SV. The SV must contain a
-string.
+A version of C<SvPV> which guarantees to evaluate sv only once.
- char* SvPVX(SV* sv)
+ char* SvPVx(SV* sv, STRLEN len)
=for hackers
Found in file sv.h
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
=for hackers
Found in file sv.h
-=item SvUVX
+=item SvUVx
-Returns the raw value in the SV's UV slot, without checks or conversions.
-Only use when you are sure SvIOK is true. See also C<SvUV()>.
+Coerces the given SV to an unsigned integer and returns it. Guarantees to
+evaluate sv only once. Use the more efficent C<SvUV> otherwise.
- UV SvUVX(SV* sv)
+ UV SvUVx(SV* sv)
=for hackers
Found in file sv.h
-=item SvUVx
+=item SvUVX
-Coerces the given SV to an unsigned integer and returns it. Guarantees to
-evaluate sv only once. Use the more efficent C<SvUV> otherwise.
+Returns the raw value in the SV's UV slot, without checks or conversions.
+Only use when you are sure SvIOK is true. See also C<SvUV()>.
- UV SvUVx(SV* sv)
+ UV SvUVX(SV* sv)
=for hackers
Found in file sv.h
=for hackers
Found in file sv.c
+=item sv_recode_to_utf8
+
+If PL_encoding is set you can call this to recode the pv of the sv.
+The PL_encoding is assumed to be an Encode object, on entry the pv is assumed
+to be octets in that encoding, and the sv will be converted into Unicode
+(and UTF-8).
+
+If PL_encoding is not an Encode object, things will go boom.
+
+ void sv_recode_to_utf8(SV*)
+
+=for hackers
+Found in file sv.c
+
=item sv_reftype
Returns a string describing what the SV is a reference to.
/* PERL_CALLCONV char* sv_pvn_force(pTHX_ SV* sv, STRLEN* lp); */
PERL_CALLCONV char* Perl_sv_pvutf8n_force(pTHX_ SV* sv, STRLEN* lp);
PERL_CALLCONV char* Perl_sv_pvbyten_force(pTHX_ SV* sv, STRLEN* lp);
+PERL_CALLCONV void Perl_sv_recode_to_utf8(pTHX_ SV*);
PERL_CALLCONV char* Perl_sv_reftype(pTHX_ SV* sv, int ob);
PERL_CALLCONV void Perl_sv_replace(pTHX_ SV* sv, SV* nsv);
PERL_CALLCONV void Perl_sv_report_used(pTHX);
sv_force_normal(sv);
}
- if (PL_encoding) {
- SV *uni;
- STRLEN len;
- char *s;
- dSP;
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- EXTEND(SP, 3);
- XPUSHs(PL_encoding);
- XPUSHs(sv);
- XPUSHs(&PL_sv_yes);
- PUTBACK;
- call_method("decode", G_SCALAR);
- SPAGAIN;
- uni = POPs;
- PUTBACK;
- s = SvPVutf8(uni, len);
- if (s != SvPVX(sv)) {
- SvGROW(sv, len);
- Move(s, SvPVX(sv), len, char);
- SvCUR_set(sv, len);
- }
- FREETMPS;
- LEAVE;
- } else { /* Assume Latin-1/EBCDIC */
+ if (PL_encoding)
+ Perl_sv_recode_to_utf8(aTHX_ sv);
+ else { /* Assume Latin-1/EBCDIC */
/* This function could be much more efficient if we
* had a FLAG in SVs to signal if there are any hibit
* chars in the PV. Given that there isn't such a flag
Safefree(s); /* No longer using what was there before. */
SvLEN(sv) = len; /* No longer know the real size. */
}
+ /* Mark as UTF-8 even if no hibit - saves scanning loop */
+ SvUTF8_on(sv);
}
- /* Mark as UTF-8 even if no hibit - saves scanning loop */
- SvUTF8_on(sv);
return SvCUR(sv);
}
#endif /* USE_ITHREADS */
+/*
+=for apidoc sv_recode_to_utf8
+
+If PL_encoding is set you can call this to recode the pv of the sv.
+The PL_encoding is assumed to be an Encode object, on entry the pv is assumed
+to be octets in that encoding, and the sv will be converted into Unicode
+(and UTF-8).
+
+If PL_encoding is not an Encode object, things will go boom.
+
+=cut
+*/
+
+void
+Perl_sv_recode_to_utf8(pTHX_ SV *sv)
+{
+ SV *uni;
+ STRLEN len;
+ char *s;
+ dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ EXTEND(SP, 3);
+ XPUSHs(PL_encoding);
+ XPUSHs(sv);
+ XPUSHs(&PL_sv_yes);
+ PUTBACK;
+ call_method("decode", G_SCALAR);
+ SPAGAIN;
+ uni = POPs;
+ PUTBACK;
+ s = SvPVutf8(uni, len);
+ if (s != SvPVX(sv)) {
+ SvGROW(sv, len);
+ Move(s, SvPVX(sv), len, char);
+ SvCUR_set(sv, len);
+ }
+ FREETMPS;
+ LEAVE;
+ SvUTF8_on(sv);
+}
+
Perl_croak(aTHX_ "panic: constant overflowed allocated space");
SvPOK_on(sv);
+ if (PL_encoding && !has_utf8) {
+ Perl_sv_recode_to_utf8(aTHX_ sv);
+ has_utf8 = TRUE;
+ }
if (has_utf8) {
SvUTF8_on(sv);
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
return count;
}
#endif
+