From: DongHun Kwak Date: Wed, 28 Jun 2017 01:51:39 +0000 (+0900) Subject: Imported Upstream version 5.25.10 X-Git-Tag: upstream/5.25.11~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Fchanges%2F51%2F136051%2F1;p=platform%2Fupstream%2Fperl.git Imported Upstream version 5.25.10 Change-Id: I07a263a82cdb7997cb862547ad3fcf510bfa9780 Signed-off-by: DongHun Kwak --- diff --git a/AUTHORS b/AUTHORS index e5eff79..4e4756b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -259,6 +259,7 @@ Clinton A. Pierce Colin Kuskie Colin McMillen Colin Meyer +Colin Newell Colin Watson Conrad Augustin Conrad E. Kimball @@ -929,6 +930,7 @@ Ollivier Robert Osvaldo Villalon Owain G. Ainsworth Owen Taylor +Pali Papp Zoltan parv Pascal Rigaux diff --git a/Cross/config.sh-arm-linux b/Cross/config.sh-arm-linux index c2647f0..d299ee4 100644 --- a/Cross/config.sh-arm-linux +++ b/Cross/config.sh-arm-linux @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/hostname' api_revision='5' -api_subversion='9' +api_subversion='10' api_version='25' -api_versionstring='5.25.9' +api_versionstring='5.25.10' ar='ar' -archlib='/usr/lib/perl5/5.25.9/armv4l-linux' -archlibexp='/usr/lib/perl5/5.25.9/armv4l-linux' +archlib='/usr/lib/perl5/5.25.10/armv4l-linux' +archlibexp='/usr/lib/perl5/5.25.10/armv4l-linux' archname64='' archname='armv4l-linux' archobjs='' @@ -56,7 +56,7 @@ castflags='0' cat='cat' cc='cc' cccdlflags='-fpic' -ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.25.9/armv4l-linux/CORE' +ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.25.10/armv4l-linux/CORE' ccflags='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccflags_uselargefiles='-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccname='arm-linux-gcc' @@ -826,7 +826,7 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='./install_me_here/usr/lib/perl5/5.25.9/armv4l-linux' +installarchlib='./install_me_here/usr/lib/perl5/5.25.10/armv4l-linux' installbin='./install_me_here/usr/bin' installhtml1dir='' installhtml3dir='' @@ -834,13 +834,13 @@ installman1dir='./install_me_here/usr/share/man/man1' installman3dir='./install_me_here/usr/share/man/man3' installprefix='./install_me_here/usr' installprefixexp='./install_me_here/usr' -installprivlib='./install_me_here/usr/lib/perl5/5.25.9' +installprivlib='./install_me_here/usr/lib/perl5/5.25.10' installscript='./install_me_here/usr/bin' -installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.25.9/armv4l-linux' +installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.25.10/armv4l-linux' installsitebin='./install_me_here/usr/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.25.9' +installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.25.10' installsiteman1dir='./install_me_here/usr/share/man/man1' installsiteman3dir='./install_me_here/usr/share/man/man3' installsitescript='./install_me_here/usr/bin' @@ -974,8 +974,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/usr/lib/perl5/5.25.9' -privlibexp='/usr/lib/perl5/5.25.9' +privlib='/usr/lib/perl5/5.25.10' +privlibexp='/usr/lib/perl5/5.25.10' procselfexe='"/proc/self/exe"' prototype='define' ptrsize='4' @@ -1040,17 +1040,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 6, 17, 29, 31, 0' sig_size='68' signal_t='void' -sitearch='/usr/lib/perl5/site_perl/5.25.9/armv4l-linux' -sitearchexp='/usr/lib/perl5/site_perl/5.25.9/armv4l-linux' +sitearch='/usr/lib/perl5/site_perl/5.25.10/armv4l-linux' +sitearchexp='/usr/lib/perl5/site_perl/5.25.10/armv4l-linux' sitebin='/usr/bin' sitebinexp='/usr/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/usr/lib/perl5/site_perl/5.25.9' +sitelib='/usr/lib/perl5/site_perl/5.25.10' sitelib_stem='/usr/lib/perl5/site_perl' -sitelibexp='/usr/lib/perl5/site_perl/5.25.9' +sitelibexp='/usr/lib/perl5/site_perl/5.25.10' siteman1dir='/usr/share/man/man1' siteman1direxp='/usr/share/man/man1' siteman3dir='/usr/share/man/man3' @@ -1089,7 +1089,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='9' +subversion='10' sysman='/usr/share/man/man1' tail='' tar='' @@ -1181,8 +1181,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.25.9' -version_patchlevel_string='version 25 subversion 9' +version='5.25.10' +version_patchlevel_string='version 25 subversion 10' versiononly='undef' vi='' xlibpth='/usr/lib/386 /lib/386' @@ -1196,9 +1196,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=25 -PERL_SUBVERSION=9 +PERL_SUBVERSION=10 PERL_API_REVISION=5 PERL_API_VERSION=25 -PERL_API_SUBVERSION=9 +PERL_API_SUBVERSION=10 PERL_PATCHLEVEL= PERL_CONFIG_SH=true diff --git a/Cross/config.sh-arm-linux-n770 b/Cross/config.sh-arm-linux-n770 index 05d51b7..a031546 100644 --- a/Cross/config.sh-arm-linux-n770 +++ b/Cross/config.sh-arm-linux-n770 @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/hostname' api_revision='5' -api_subversion='9' +api_subversion='10' api_version='25' -api_versionstring='5.25.9' +api_versionstring='5.25.10' ar='ar' -archlib='/usr/lib/perl5/5.25.9/armv4l-linux' -archlibexp='/usr/lib/perl5/5.25.9/armv4l-linux' +archlib='/usr/lib/perl5/5.25.10/armv4l-linux' +archlibexp='/usr/lib/perl5/5.25.10/armv4l-linux' archname64='' archname='armv4l-linux' archobjs='' @@ -55,7 +55,7 @@ castflags='0' cat='cat' cc='arm-none-linux-gnueabi-gcc' cccdlflags='-fpic' -ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.25.9/armv4l-linux/CORE' +ccdlflags='-rdynamic -Wl,-rpath,/usr/lib/perl5/5.25.10/armv4l-linux/CORE' ccflags='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccflags_uselargefiles='-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64' ccname='arm-linux-gcc' @@ -699,7 +699,7 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='./install_me_here/usr/lib/perl5/5.25.9/armv4l-linux' +installarchlib='./install_me_here/usr/lib/perl5/5.25.10/armv4l-linux' installbin='./install_me_here/usr/bin' installhtml1dir='' installhtml3dir='' @@ -707,13 +707,13 @@ installman1dir='./install_me_here/usr/share/man/man1' installman3dir='./install_me_here/usr/share/man/man3' installprefix='./install_me_here/usr' installprefixexp='./install_me_here/usr' -installprivlib='./install_me_here/usr/lib/perl5/5.25.9' +installprivlib='./install_me_here/usr/lib/perl5/5.25.10' installscript='./install_me_here/usr/bin' -installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.25.9/armv4l-linux' +installsitearch='./install_me_here/usr/lib/perl5/site_perl/5.25.10/armv4l-linux' installsitebin='./install_me_here/usr/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.25.9' +installsitelib='./install_me_here/usr/lib/perl5/site_perl/5.25.10' installsiteman1dir='./install_me_here/usr/share/man/man1' installsiteman3dir='./install_me_here/usr/share/man/man3' installsitescript='./install_me_here/usr/bin' @@ -841,8 +841,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/usr/lib/perl5/5.25.9' -privlibexp='/usr/lib/perl5/5.25.9' +privlib='/usr/lib/perl5/5.25.10' +privlibexp='/usr/lib/perl5/5.25.10' procselfexe='"/proc/self/exe"' prototype='define' ptrsize='4' @@ -903,17 +903,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 6, 17, 29, 31, 0' sig_size='68' signal_t='void' -sitearch='/usr/lib/perl5/site_perl/5.25.9/armv4l-linux' -sitearchexp='/usr/lib/perl5/site_perl/5.25.9/armv4l-linux' +sitearch='/usr/lib/perl5/site_perl/5.25.10/armv4l-linux' +sitearchexp='/usr/lib/perl5/site_perl/5.25.10/armv4l-linux' sitebin='/usr/bin' sitebinexp='/usr/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/usr/lib/perl5/site_perl/5.25.9' +sitelib='/usr/lib/perl5/site_perl/5.25.10' sitelib_stem='/usr/lib/perl5/site_perl' -sitelibexp='/usr/lib/perl5/site_perl/5.25.9' +sitelibexp='/usr/lib/perl5/site_perl/5.25.10' siteman1dir='/usr/share/man/man1' siteman1direxp='/usr/share/man/man1' siteman3dir='/usr/share/man/man3' @@ -950,7 +950,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='9' +subversion='10' sysman='/usr/share/man/man1' tail='' tar='' @@ -1035,8 +1035,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.25.9' -version_patchlevel_string='version 25 subversion 9' +version='5.25.10' +version_patchlevel_string='version 25 subversion 10' versiononly='undef' vi='' xlibpth='/usr/lib/386 /lib/386' @@ -1050,9 +1050,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=25 -PERL_SUBVERSION=9 +PERL_SUBVERSION=10 PERL_API_REVISION=5 PERL_API_VERSION=25 -PERL_API_SUBVERSION=9 +PERL_API_SUBVERSION=10 PERL_PATCHLEVEL= PERL_CONFIG_SH=true diff --git a/INSTALL b/INSTALL index 8fcc58f..69b862d 100644 --- a/INSTALL +++ b/INSTALL @@ -588,7 +588,7 @@ The directories set up by Configure fall into three broad categories. =item Directories for the perl distribution -By default, Configure will use the following directories for 5.25.9. +By default, Configure will use the following directories for 5.25.10. $version is the full perl version number, including subversion, e.g. 5.12.3, and $archname is a string like sun4-sunos, determined by Configure. The full definitions of all Configure @@ -2441,7 +2441,7 @@ http://www.chiark.greenend.org.uk/~sgtatham/bugs.html =head1 Coexistence with earlier versions of perl 5 -Perl 5.25.9 is not binary compatible with earlier versions of Perl. +Perl 5.25.10 is not binary compatible with earlier versions of Perl. In other words, you will have to recompile your XS modules. In general, you can usually safely upgrade from one version of Perl @@ -2516,9 +2516,9 @@ won't interfere with another version. (The defaults guarantee this for libraries after 5.6.0, but not for executables. TODO?) One convenient way to do this is by using a separate prefix for each version, such as - sh Configure -Dprefix=/opt/perl5.25.9 + sh Configure -Dprefix=/opt/perl5.25.10 -and adding /opt/perl5.25.9/bin to the shell PATH variable. Such users +and adding /opt/perl5.25.10/bin to the shell PATH variable. Such users may also wish to add a symbolic link /usr/local/bin/perl so that scripts can still start with #!/usr/local/bin/perl. @@ -2531,13 +2531,13 @@ seriously consider using a separate directory, since development subversions may not have all the compatibility wrinkles ironed out yet. -=head2 Upgrading from 5.25.2 or earlier +=head2 Upgrading from 5.25.9 or earlier -B Perl modules having binary parts (meaning that a C compiler is used) will have to be recompiled to be -used with 5.25.9. If you find you do need to rebuild an extension with -5.25.9, you may safely do so without disturbing the older +used with 5.25.10. If you find you do need to rebuild an extension with +5.25.10, you may safely do so without disturbing the older installations. (See L<"Coexistence with earlier versions of perl 5"> above.) @@ -2570,15 +2570,15 @@ Firstly, the bare minimum to run this script print("$f\n"); } -in Linux with perl-5.25.9 is as follows (under $Config{prefix}): +in Linux with perl-5.25.10 is as follows (under $Config{prefix}): ./bin/perl - ./lib/perl5/5.25.9/strict.pm - ./lib/perl5/5.25.9/warnings.pm - ./lib/perl5/5.25.9/i686-linux/File/Glob.pm - ./lib/perl5/5.25.9/feature.pm - ./lib/perl5/5.25.9/XSLoader.pm - ./lib/perl5/5.25.9/i686-linux/auto/File/Glob/Glob.so + ./lib/perl5/5.25.10/strict.pm + ./lib/perl5/5.25.10/warnings.pm + ./lib/perl5/5.25.10/i686-linux/File/Glob.pm + ./lib/perl5/5.25.10/feature.pm + ./lib/perl5/5.25.10/XSLoader.pm + ./lib/perl5/5.25.10/i686-linux/auto/File/Glob/Glob.so Secondly, for perl-5.10.1, the Debian perl-base package contains 591 files, (of which 510 are for lib/unicore) totaling about 3.5MB in its diff --git a/MANIFEST b/MANIFEST index 4875774..bf36939 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4228,7 +4228,17 @@ ext/XS-APItest/t/gv_fetchmeth.t XS::APItest: tests for gv_fetchmeth() and varia ext/XS-APItest/t/gv_fetchmeth_autoload.t XS::APItest: tests for gv_fetchmeth_autoload() and variants ext/XS-APItest/t/gv_fetchmethod_flags.t XS::APItest: tests for gv_fetchmethod_flags() and variants ext/XS-APItest/t/gv_init.t XS::APItest: tests for gv_init and variants -ext/XS-APItest/t/handy.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy0.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy1.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy2.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy3.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy4.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy5.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy6.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy7.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy8.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy9.t XS::APItest: tests for handy.h +ext/XS-APItest/t/handy_base.pl XS::APItest: tests for handy.h ext/XS-APItest/t/hash.t XS::APItest: tests for hash related APIs ext/XS-APItest/t/join_with_space.t test op_convert_list ext/XS-APItest/t/keyword_multiline.t test keyword plugin parsing across lines @@ -4289,6 +4299,19 @@ ext/XS-APItest/t/temp_lv_sub.t XS::APItest: tests for lvalue subs returning temp ext/XS-APItest/t/underscore_length.t Test find_rundefsv() ext/XS-APItest/t/utf16_to_utf8.t Test behaviour of utf16_to_utf8{,reversed} ext/XS-APItest/t/utf8.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_malformed.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_setup.pl Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn0.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn1.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn2.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn3.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn4.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn5.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn6.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn7.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn8.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn9.t Tests for code in utf8.c +ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs() ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants ext/XS-APItest/t/win32.t Test Win32 specific APIs @@ -4915,6 +4938,7 @@ pod/perl5255delta.pod Perl changes in version 5.25.5 pod/perl5256delta.pod Perl changes in version 5.25.6 pod/perl5257delta.pod Perl changes in version 5.25.7 pod/perl5258delta.pod Perl changes in version 5.25.8 +pod/perl5259delta.pod Perl changes in version 5.25.9 pod/perl561delta.pod Perl changes in version 5.6.1 pod/perl56delta.pod Perl changes in version 5.6 pod/perl581delta.pod Perl changes in version 5.8.1 @@ -5332,6 +5356,7 @@ t/lib/croak/pp_ctl Test croak calls from pp_ctl.c t/lib/croak/pp_hot Test croak calls from pp_hot.c t/lib/croak/pp_sys Test croak calls from pp_sys.c t/lib/croak/toke Test croak calls from toke.c +t/lib/croak/toke_l1 Test croak calls from toke.c; file is not UTF-8 encoded t/lib/cygwin.t Builtin cygwin function tests t/lib/dbmt_common.pl Common functionality for ?DBM_File tests t/lib/deprecate.t Test deprecate.pm diff --git a/META.json b/META.json index 41aa72a..0dbd6ee 100644 --- a/META.json +++ b/META.json @@ -126,6 +126,6 @@ "url" : "http://perl5.git.perl.org/" } }, - "version" : "5.025009", + "version" : "5.025010", "x_serialization_backend" : "JSON::PP version 2.27400_02" } diff --git a/META.yml b/META.yml index 6753b00..a652b8a 100644 --- a/META.yml +++ b/META.yml @@ -113,5 +113,5 @@ resources: homepage: http://www.perl.org/ license: http://dev.perl.org/licenses/ repository: http://perl5.git.perl.org/ -version: '5.025009' +version: '5.025010' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' diff --git a/Makefile.SH b/Makefile.SH index 2cb0010..4fa9ef0 100755 --- a/Makefile.SH +++ b/Makefile.SH @@ -569,7 +569,7 @@ esac $spitshell >>$Makefile <<'!NO!SUBS!' -perltoc_pod_prereqs = extra.pods pod/perl5259delta.pod pod/perlapi.pod pod/perlintern.pod pod/perlmodlib.pod pod/perluniprops.pod +perltoc_pod_prereqs = extra.pods pod/perl52510delta.pod pod/perlapi.pod pod/perlintern.pod pod/perlmodlib.pod pod/perluniprops.pod generated_pods = pod/perltoc.pod $(perltoc_pod_prereqs) generated_headers = uudmap.h bitcount.h mg_data.h @@ -1119,9 +1119,9 @@ pod/perlintern.pod: $(MINIPERL_EXE) autodoc.pl embed.fnc pod/perlmodlib.pod: $(MINIPERL_EXE) pod/perlmodlib.PL MANIFEST $(MINIPERL) pod/perlmodlib.PL -q -pod/perl5259delta.pod: pod/perldelta.pod - $(RMS) pod/perl5259delta.pod - $(LNS) perldelta.pod pod/perl5259delta.pod +pod/perl52510delta.pod: pod/perldelta.pod + $(RMS) pod/perl52510delta.pod + $(LNS) perldelta.pod pod/perl52510delta.pod extra.pods: $(MINIPERL_EXE) -@test ! -f extra.pods || rm -f `cat extra.pods` diff --git a/NetWare/Makefile b/NetWare/Makefile index 948bc94..9ebb389 100644 --- a/NetWare/Makefile +++ b/NetWare/Makefile @@ -86,7 +86,7 @@ NLM_VERSION = 3,20,0 # Here comes the CW tools - TO BE FILLED TO BUILD WITH CW - -MODULE_DESC = "Perl 5.25.9 for NetWare" +MODULE_DESC = "Perl 5.25.10 for NetWare" CCTYPE = CodeWarrior C_COMPILER = mwccnlm -c CPP_COMPILER = mwccnlm @@ -462,7 +462,7 @@ INST_NW_TOP2 = $(INST_NW_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -INST_VER = \5.25.9 +INST_VER = \5.25.10 # # Comment this out if you DON'T want your perl installation to have diff --git a/NetWare/config_H.wc b/NetWare/config_H.wc index 8c02556..fcbe9c6 100644 --- a/NetWare/config_H.wc +++ b/NetWare/config_H.wc @@ -1042,7 +1042,7 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define ARCHLIB "c:\\perl\\5.25.9\\lib\\NetWare-x86-multi-thread" /**/ +#define ARCHLIB "c:\\perl\\5.25.10\\lib\\NetWare-x86-multi-thread" /**/ /*#define ARCHLIB_EXP "" /**/ /* ARCHNAME: @@ -1073,8 +1073,8 @@ * This symbol is the filename expanded version of the BIN symbol, for * programs that do not want to deal with that at run-time. */ -#define BIN "c:\\perl\\5.25.9\\bin\\NetWare-x86-multi-thread" /**/ -#define BIN_EXP "c:\\perl\\5.25.9\\bin\\NetWare-x86-multi-thread" /**/ +#define BIN "c:\\perl\\5.25.10\\bin\\NetWare-x86-multi-thread" /**/ +#define BIN_EXP "c:\\perl\\5.25.10\\bin\\NetWare-x86-multi-thread" /**/ /* BYTEORDER: * This symbol holds the hexadecimal constant defined in byteorder, @@ -3088,7 +3088,7 @@ * This symbol contains the ~name expanded version of SITEARCH, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define SITEARCH "c:\\perl\\site\\5.25.9\\lib\\NetWare-x86-multi-thread" /**/ +#define SITEARCH "c:\\perl\\site\\5.25.10\\lib\\NetWare-x86-multi-thread" /**/ /*#define SITEARCH_EXP "" /**/ /* SITELIB: @@ -3111,7 +3111,7 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "c:\\perl\\site\\5.25.9\\lib" /**/ +#define SITELIB "c:\\perl\\site\\5.25.10\\lib" /**/ /*#define SITELIB_EXP "" /**/ #define SITELIB_STEM "" /**/ diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index 5e94e19..eed9a44 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -251,7 +251,7 @@ use File::Glob qw(:case); }, 'CPAN' => { - 'DISTRIBUTION' => 'ANDK/CPAN-2.16-TRIAL2.tar.gz', + 'DISTRIBUTION' => 'ANDK/CPAN-2.17-TRIAL2.tar.gz', 'FILES' => q[cpan/CPAN], 'EXCLUDED' => [ qr{^distroprefs/}, @@ -837,7 +837,7 @@ use File::Glob qw(:case); }, 'Module::CoreList' => { - 'DISTRIBUTION' => 'BINGOS/Module-CoreList-5.20161220.tar.gz', + 'DISTRIBUTION' => 'BINGOS/Module-CoreList-5.20170120.tar.gz', 'FILES' => q[dist/Module-CoreList], }, @@ -1011,6 +1011,7 @@ use File::Glob qw(:case); 'lib/Sub/Util.pm', # CPAN RT 118470 't/lln.t', # CPAN RT 118470 't/uniq.t', # CPAN RT 118470 + 't/tainted.t', # CPAN RT 119169 ], }, @@ -1219,7 +1220,7 @@ use File::Glob qw(:case); # correct for this (and Thread::Semaphore, threads, and threads::shared) # to be under dist/ rather than cpan/ 'Thread::Queue' => { - 'DISTRIBUTION' => 'JDHEDDEN/Thread-Queue-3.11.tar.gz', + 'DISTRIBUTION' => 'JDHEDDEN/Thread-Queue-3.12.tar.gz', 'FILES' => q[dist/Thread-Queue], 'EXCLUDED' => [ qr{^examples/}, diff --git a/Porting/checkAUTHORS.pl b/Porting/checkAUTHORS.pl index 56fb39e..cb8863c 100755 --- a/Porting/checkAUTHORS.pl +++ b/Porting/checkAUTHORS.pl @@ -833,6 +833,7 @@ module@renee-baecker.de renee.baecker\100smart-websolutions.de + github@renee-baecker.de + otrs\100ubuntu.(none) + perl\100renee-baecker.de ++ reb\100perl-services.de richard.foley\100rfi.net richard.foley\100t-online.de + richard.foley\100ubs.com + richard.foley\100ubsw.com diff --git a/Porting/config.sh b/Porting/config.sh index fbeca17..f5a18e3 100644 --- a/Porting/config.sh +++ b/Porting/config.sh @@ -39,12 +39,12 @@ alignbytes='8' ansi2knr='' aphostname='/bin/hostname' api_revision='5' -api_subversion='9' +api_subversion='10' api_version='25' -api_versionstring='5.25.9' +api_versionstring='5.25.10' ar='ar' -archlib='/tmp/mblead/lib/perl5/5.25.9/darwin-2level' -archlibexp='/tmp/mblead/lib/perl5/5.25.9/darwin-2level' +archlib='/tmp/mblead/lib/perl5/5.25.10/darwin-2level' +archlibexp='/tmp/mblead/lib/perl5/5.25.10/darwin-2level' archname64='' archname='darwin-2level' archobjs='' @@ -846,7 +846,7 @@ incpath='' incpth='/usr/local/include /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/6.0/include /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include /usr/include /usr/local/include /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/6.0/include /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include /usr/include /usr/local/include /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/6.0/include /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include /usr/include' inews='' initialinstalllocation='/tmp/mblead/bin' -installarchlib='/tmp/mblead/lib/perl5/5.25.9/darwin-2level' +installarchlib='/tmp/mblead/lib/perl5/5.25.10/darwin-2level' installbin='/tmp/mblead/bin' installhtml1dir='' installhtml3dir='' @@ -854,13 +854,13 @@ installman1dir='/tmp/mblead/man/man1' installman3dir='/tmp/mblead/man/man3' installprefix='/tmp/mblead' installprefixexp='/tmp/mblead' -installprivlib='/tmp/mblead/lib/perl5/5.25.9' +installprivlib='/tmp/mblead/lib/perl5/5.25.10' installscript='/tmp/mblead/bin' -installsitearch='/tmp/mblead/lib/perl5/site_perl/5.25.9/darwin-2level' +installsitearch='/tmp/mblead/lib/perl5/site_perl/5.25.10/darwin-2level' installsitebin='/tmp/mblead/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='/tmp/mblead/lib/perl5/site_perl/5.25.9' +installsitelib='/tmp/mblead/lib/perl5/site_perl/5.25.10' installsiteman1dir='/tmp/mblead/man/man1' installsiteman3dir='/tmp/mblead/man/man3' installsitescript='/tmp/mblead/bin' @@ -985,7 +985,7 @@ perl_patchlevel='' perl_static_inline='static __inline__' perladmin='aaron@daybreak.nonet' perllibs='-lpthread -ldl -lm -lutil -lc' -perlpath='/tmp/mblead/bin/perl5.25.9' +perlpath='/tmp/mblead/bin/perl5.25.10' pg='pg' phostname='hostname' pidtype='pid_t' @@ -994,8 +994,8 @@ pmake='' pr='' prefix='/tmp/mblead' prefixexp='/tmp/mblead' -privlib='/tmp/mblead/lib/perl5/5.25.9' -privlibexp='/tmp/mblead/lib/perl5/5.25.9' +privlib='/tmp/mblead/lib/perl5/5.25.10' +privlibexp='/tmp/mblead/lib/perl5/5.25.10' procselfexe='' prototype='define' ptrsize='8' @@ -1061,17 +1061,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 6, 0' sig_size='33' signal_t='void' -sitearch='/tmp/mblead/lib/perl5/site_perl/5.25.9/darwin-2level' -sitearchexp='/tmp/mblead/lib/perl5/site_perl/5.25.9/darwin-2level' +sitearch='/tmp/mblead/lib/perl5/site_perl/5.25.10/darwin-2level' +sitearchexp='/tmp/mblead/lib/perl5/site_perl/5.25.10/darwin-2level' sitebin='/tmp/mblead/bin' sitebinexp='/tmp/mblead/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/tmp/mblead/lib/perl5/site_perl/5.25.9' +sitelib='/tmp/mblead/lib/perl5/site_perl/5.25.10' sitelib_stem='/tmp/mblead/lib/perl5/site_perl' -sitelibexp='/tmp/mblead/lib/perl5/site_perl/5.25.9' +sitelibexp='/tmp/mblead/lib/perl5/site_perl/5.25.10' siteman1dir='/tmp/mblead/man/man1' siteman1direxp='/tmp/mblead/man/man1' siteman3dir='/tmp/mblead/man/man3' @@ -1097,7 +1097,7 @@ src='.' ssizetype='ssize_t' st_ino_sign='1' st_ino_size='8' -startperl='#!/tmp/mblead/bin/perl5.25.9' +startperl='#!/tmp/mblead/bin/perl5.25.10' startsh='#!/bin/sh' static_ext=' ' stdchar='char' @@ -1110,7 +1110,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='9' +subversion='10' sysman='/usr/share/man/man1' sysroot='' tail='' @@ -1209,8 +1209,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.25.9' -version_patchlevel_string='version 25 subversion 9' +version='5.25.10' +version_patchlevel_string='version 25 subversion 10' versiononly='define' vi='' xlibpth='/usr/lib/386 /lib/386' @@ -1220,9 +1220,9 @@ zcat='' zip='zip' PERL_REVISION=5 PERL_VERSION=25 -PERL_SUBVERSION=9 +PERL_SUBVERSION=10 PERL_API_REVISION=5 PERL_API_VERSION=25 -PERL_API_SUBVERSION=9 +PERL_API_SUBVERSION=10 PERL_PATCHLEVEL='' PERL_CONFIG_SH=true diff --git a/Porting/config_H b/Porting/config_H index 54b5b9d..46da2dd 100644 --- a/Porting/config_H +++ b/Porting/config_H @@ -960,8 +960,8 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define ARCHLIB "/pro/lib/perl5/5.25.9/i686-linux-64int-ld" /**/ -#define ARCHLIB_EXP "/pro/lib/perl5/5.25.9/i686-linux-64int-ld" /**/ +#define ARCHLIB "/pro/lib/perl5/5.25.10/i686-linux-64int-ld" /**/ +#define ARCHLIB_EXP "/pro/lib/perl5/5.25.10/i686-linux-64int-ld" /**/ /* ARCHNAME: * This symbol holds a string representing the architecture name. @@ -2068,8 +2068,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/pro/lib/perl5/5.25.9" /**/ -#define PRIVLIB_EXP "/pro/lib/perl5/5.25.9" /**/ +#define PRIVLIB "/pro/lib/perl5/5.25.10" /**/ +#define PRIVLIB_EXP "/pro/lib/perl5/5.25.10" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -2119,8 +2119,8 @@ * This symbol contains the ~name expanded version of SITEARCH, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define SITEARCH "/pro/lib/perl5/site_perl/5.25.9/i686-linux-64int-ld" /**/ -#define SITEARCH_EXP "/pro/lib/perl5/site_perl/5.25.9/i686-linux-64int-ld" /**/ +#define SITEARCH "/pro/lib/perl5/site_perl/5.25.10/i686-linux-64int-ld" /**/ +#define SITEARCH_EXP "/pro/lib/perl5/site_perl/5.25.10/i686-linux-64int-ld" /**/ /* SITELIB: * This symbol contains the name of the private library for this package. @@ -2142,8 +2142,8 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "/pro/lib/perl5/site_perl/5.25.9" /**/ -#define SITELIB_EXP "/pro/lib/perl5/site_perl/5.25.9" /**/ +#define SITELIB "/pro/lib/perl5/site_perl/5.25.10" /**/ +#define SITELIB_EXP "/pro/lib/perl5/site_perl/5.25.10" /**/ #define SITELIB_STEM "/pro/lib/perl5/site_perl" /**/ /* SSize_t: @@ -4282,7 +4282,7 @@ * script to make sure (one hopes) that it runs with perl and not * some shell. */ -#define STARTPERL "#!/pro/bin/perl5.25.9" /**/ +#define STARTPERL "#!/pro/bin/perl5.25.10" /**/ /* HAS_STDIO_STREAM_ARRAY: * This symbol, if defined, tells that there is an array diff --git a/Porting/epigraphs.pod b/Porting/epigraphs.pod index 28d3740..1e2c717 100644 --- a/Porting/epigraphs.pod +++ b/Porting/epigraphs.pod @@ -17,6 +17,17 @@ Consult your favorite dictionary for details. =head1 EPIGRAPHS +=head2 v5.25.9 - A. A. Milne, "Winnie-the-Pooh", 1926 + +L + + Pooh always liked a little something at eleven o'clock in the + morning, and he was very glad to see Rabbit getting out the plates + and mugs; and when Rabbit said, "Honey or condensed milk with + your bread?" he was so excited that he said, "Both," and then, + so as not to seem greedy, he added, "But don't bother about the + bread, please." + =head2 v5.25.8 - Langston Hughes, So long L diff --git a/Porting/release_schedule.pod b/Porting/release_schedule.pod index 7e74c6f..3bb606d 100644 --- a/Porting/release_schedule.pod +++ b/Porting/release_schedule.pod @@ -16,7 +16,8 @@ deemed necessary by the Pumpking. Code freezes (which happen in the 5.25.X series) 2016-12-20 5.25.8 Contentious changes freeze - 2017-01-20 5.25.9 User-visible changes freeze + 2017-01-20 5.25.9 User-visible changes to correctly + functioning programs freeze 2017-02-20 5.25.10 Full code freeze 2017-04-20 5.26.0 Stable release! @@ -59,7 +60,7 @@ you should reset the version numbers to the next blead series. 2016-10-20 5.25.6 ✓ Aaron Crane 2016-11-20 5.25.7 ✓ Chad Granum 2016-12-20 5.25.8 ✓ Sawyer X - 2017-01-20 5.25.9 Abigail + 2017-01-20 5.25.9 ✓ Abigail 2017-02-20 5.25.10 Renée Bäcker (RC0 for 5.26.0 will be released once we think that all the blockers have been diff --git a/Porting/todo.pod b/Porting/todo.pod index f8ae842..94e7480 100644 --- a/Porting/todo.pod +++ b/Porting/todo.pod @@ -485,7 +485,7 @@ Natively 64-bit systems need neither -Duse64bitint nor -Duse64bitall. On these systems, it might be the default compilation mode, and there is currently no guarantee that passing no use64bitall option to the Configure process will build a 32bit perl. Implementing -Duse32bit* -options would be nice for perl 5.25.9. +options would be nice for perl 5.25.10. =head2 Profile Perl - am I hot or not? @@ -1205,7 +1205,7 @@ L =head1 Big projects Tasks that will get your name mentioned in the description of the "Highlights -of 5.25.9" +of 5.25.10" =head2 make ithreads more robust diff --git a/README.haiku b/README.haiku index f914965..1a01d0f 100644 --- a/README.haiku +++ b/README.haiku @@ -22,9 +22,9 @@ The build procedure is completely standard: Make perl executable and create a symlink for libperl: chmod a+x /boot/common/bin/perl - cd /boot/common/lib; ln -s perl5/5.25.9/BePC-haiku/CORE/libperl.so . + cd /boot/common/lib; ln -s perl5/5.25.10/BePC-haiku/CORE/libperl.so . -Replace C<5.25.9> with your respective version of Perl. +Replace C<5.25.10> with your respective version of Perl. =head1 KNOWN PROBLEMS diff --git a/README.macosx b/README.macosx index 6d93d15..70cbf74 100644 --- a/README.macosx +++ b/README.macosx @@ -10,9 +10,9 @@ perlmacosx - Perl under Mac OS X This document briefly describes Perl under Mac OS X. - curl -O http://www.cpan.org/src/perl-5.25.9.tar.gz - tar -xzf perl-5.25.9.tar.gz - cd perl-5.25.9 + curl -O http://www.cpan.org/src/perl-5.25.10.tar.gz + tar -xzf perl-5.25.10.tar.gz + cd perl-5.25.10 ./Configure -des -Dprefix=/usr/local/ make make test @@ -20,7 +20,7 @@ This document briefly describes Perl under Mac OS X. =head1 DESCRIPTION -The latest Perl release (5.25.9 as of this writing) builds without changes +The latest Perl release (5.25.10 as of this writing) builds without changes under all versions of Mac OS X from 10.3 "Panther" onwards. In order to build your own version of Perl you will need 'make', diff --git a/README.os2 b/README.os2 index 475dcc4..940b804 100644 --- a/README.os2 +++ b/README.os2 @@ -619,7 +619,7 @@ C in F, see L">. =item Additional Perl modules - unzip perl_ste.zip -d f:/perllib/lib/site_perl/5.25.9/ + unzip perl_ste.zip -d f:/perllib/lib/site_perl/5.25.10/ Same remark as above applies. Additionally, if this directory is not one of directories on @INC (and @INC is influenced by C), you diff --git a/README.vms b/README.vms index d9218ad..efe93a2 100644 --- a/README.vms +++ b/README.vms @@ -142,11 +142,11 @@ You may need to set up a foreign symbol for the unpacking utility of choice. Once you have done so, use a command like the following to unpack the archive: - vmstar -xvf perl-5^.25^.9.tar + vmstar -xvf perl-5^.25^.10.tar Then set default to the top-level source directory like so: - set default [.perl-5^.25^.9] + set default [.perl-5^.25^.10] and proceed with configuration as described in the next section. diff --git a/README.win32 b/README.win32 index 1b68250..0e891fc 100644 --- a/README.win32 +++ b/README.win32 @@ -63,10 +63,10 @@ that are also supported by perl's makefile. =back The Microsoft Visual C++ compilers are also now being given away free. They are -available as "Visual C++ Toolkit 2003" or "Visual C++ 2005-2013 Express +available as "Visual C++ Toolkit 2003" or "Visual C++ 2005-2015 Express Edition" (and also as part of the ".NET Framework SDK") and are the same compilers that ship with "Visual C++ .NET 2003 Professional" or "Visual C++ -2005-2013 Professional" respectively. +2005-2015 Professional" respectively. This port can also be built on IA64/AMD64 using: @@ -139,9 +139,9 @@ console already set up for your target architecture (x86-32 or x86-64 or IA64). With the newer compilers, you may also use the older batch files if you choose so. -=item Microsoft Visual C++ 2008-2013 Express Edition +=item Microsoft Visual C++ 2008-2015 Express Edition -These free versions of Visual C++ 2008-2013 Professional contain the same +These free versions of Visual C++ 2008-2015 Professional contain the same compilers and linkers that ship with the full versions, and also contain everything necessary to build Perl, rather than requiring a separate download of the Windows SDK like previous versions did. @@ -151,14 +151,14 @@ L. (Providing ex links to these packages has proven a pointless task because the links keep on changing so often.) -Install Visual C++ 2008-2013 Express, then setup your environment using, e.g. +Install Visual C++ 2008-2015 Express, then setup your environment using, e.g. C:\Program Files\Microsoft Visual Studio 12.0\Common7\Tools\vsvars32.bat (assuming the default installation location was chosen). Perl should now build using the win32/Makefile. You will need to edit that -file to set CCTYPE to one of MSVC90FREE-MSVC120FREE first. +file to set CCTYPE to one of MSVC90FREE-MSVC140FREE first. =item Microsoft Visual C++ 2005 Express Edition @@ -421,8 +421,8 @@ There should be no test failures. If you build with Visual C++ 2013 then three tests currently may fail with Daylight Saving Time related problems: F, F and F. The failures are -caused by bugs in the CRT in VC++ 2013 which will be fixed in future releases -of VC++, as explained by Microsoft here: +caused by bugs in the CRT in VC++ 2013 which are fixed in VC++2015 and +later, as explained by Microsoft here: L. In the meantime, if you need fixed C and C functions then have a look at the CPAN distribution Win32::UTCFileTime. @@ -950,6 +950,6 @@ Win9x support was added in 5.6 (Benjamin Stuhl). Support for 64-bit Windows added in 5.8 (ActiveState Corp). -Last updated: 07 October 2014 +Last updated: 19 February 2017 =cut diff --git a/av.h b/av.h index d6d2137..97ce2bc 100644 --- a/av.h +++ b/av.h @@ -81,14 +81,12 @@ Same as C. ? mg_size(MUTABLE_SV(av)) : AvFILLp(av)) #define av_tindex(av) av_top_index(av) -#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) /* Note that it doesn't make sense to do this: * SvGETMAGIC(av); IV x = av_tindex_nomg(av); - * This name is controversial, and so is restricted by the #ifdef to the places - * it already occurs */ -# define av_tindex_nomg(av) (__ASSERT_(SvTYPE(av) == SVt_PVAV) AvFILLp(av)) -#endif +# define av_top_index_skip_len_mg(av) \ + (__ASSERT_(SvTYPE(av) == SVt_PVAV) AvFILLp(av)) +# define av_tindex_skip_len_mg(av) av_top_index_skip_len_mg(av) #define NEGATIVE_INDICES_VAR "NEGATIVE_INDICES" diff --git a/charclass_invlists.h b/charclass_invlists.h index 732b6d0..7b5b7ea 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -95407,7 +95407,7 @@ static const U8 WB_table[24][24] = { * 37f6186253da9824bdb27f4ad867bfe8c25d4dc6bdb2f05585e40a034675a348 lib/unicore/extracted/DLineBreak.txt * ef24061b5a5dc93d7e90c2e34530ec757180ee75d872cba65ffc946e52624ae8 lib/unicore/extracted/DNumType.txt * a197371fec9a1b517058b440841f60f9378d81682084eef8db22a88cb2f96e90 lib/unicore/extracted/DNumValues.txt - * 4bcfb4545be21663ca38a2acbfcbf2b0f3252652a34b50f1a56ef76cb959861b lib/unicore/mktables + * 79a7216aceb1d291f2857085545fdda289518bc540a09bc0a15cde105d76028d lib/unicore/mktables * cdecb300baad839a6f62791229f551a4fa33f3cbdca08e378dc976466354e778 lib/unicore/version * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl * 9534d0cc3914fa1f5d574332c3199605c3d14f8691a0729d68d8498ac2b36280 regen/mk_invlists.pl diff --git a/cpan/CPAN/lib/CPAN.pm b/cpan/CPAN/lib/CPAN.pm index 1fba5c1..185ade9 100644 --- a/cpan/CPAN/lib/CPAN.pm +++ b/cpan/CPAN/lib/CPAN.pm @@ -2,7 +2,7 @@ # vim: ts=4 sts=4 sw=4: use strict; package CPAN; -$CPAN::VERSION = '2.16'; +$CPAN::VERSION = '2.17'; $CPAN::VERSION =~ s/_//; # we need to run chdir all over and we would get at wrong libraries diff --git a/cpan/CPAN/lib/CPAN/Distribution.pm b/cpan/CPAN/lib/CPAN/Distribution.pm index b5744fd..fc7ce10 100644 --- a/cpan/CPAN/lib/CPAN/Distribution.pm +++ b/cpan/CPAN/lib/CPAN/Distribution.pm @@ -8,7 +8,7 @@ use CPAN::InfoObj; use File::Path (); @CPAN::Distribution::ISA = qw(CPAN::InfoObj); use vars qw($VERSION); -$VERSION = "2.16"; +$VERSION = "2.17"; # no prepare, because prepare is not a command on the shell command line # TODO: clear instance cache on reload @@ -377,6 +377,7 @@ sub get { ? $ENV{PERL5LIB} : ($ENV{PERLLIB} || ""); local $ENV{PERL5OPT} = defined $ENV{PERL5OPT} ? $ENV{PERL5OPT} : ""; + # local $ENV{PERL_USE_UNSAFE_INC} = exists $ENV{PERL_USE_UNSAFE_INC} ? $ENV{PERL_USE_UNSAFE_INC} : 1; # get $CPAN::META->set_perl5lib; local $ENV{MAKEFLAGS}; # protect us from outer make calls @@ -1302,6 +1303,7 @@ Could not determine which directory to use for looking at $dist. : ($ENV{PERLLIB} || ""); local $ENV{PERL5OPT} = defined $ENV{PERL5OPT} ? $ENV{PERL5OPT} : ""; + # local $ENV{PERL_USE_UNSAFE_INC} = exists $ENV{PERL_USE_UNSAFE_INC} ? $ENV{PERL_USE_UNSAFE_INC} : 1; # look $CPAN::META->set_perl5lib; local $ENV{MAKEFLAGS}; # protect us from outer make calls @@ -1821,6 +1823,7 @@ sub prepare { ? $ENV{PERL5LIB} : ($ENV{PERLLIB} || ""); local $ENV{PERL5OPT} = defined $ENV{PERL5OPT} ? $ENV{PERL5OPT} : ""; + local $ENV{PERL_USE_UNSAFE_INC} = exists $ENV{PERL_USE_UNSAFE_INC} ? $ENV{PERL_USE_UNSAFE_INC} : 1; # prepare $CPAN::META->set_perl5lib; local $ENV{MAKEFLAGS}; # protect us from outer make calls @@ -2115,6 +2118,7 @@ is part of the perl-%s distribution. To install that, you need to run ? $ENV{PERL5LIB} : ($ENV{PERLLIB} || ""); local $ENV{PERL5OPT} = defined $ENV{PERL5OPT} ? $ENV{PERL5OPT} : ""; + local $ENV{PERL_USE_UNSAFE_INC} = exists $ENV{PERL_USE_UNSAFE_INC} ? $ENV{PERL_USE_UNSAFE_INC} : 1; # make $CPAN::META->set_perl5lib; local $ENV{MAKEFLAGS}; # protect us from outer make calls @@ -3482,6 +3486,7 @@ sub test { : ($ENV{PERLLIB} || ""); local $ENV{PERL5OPT} = defined $ENV{PERL5OPT} ? $ENV{PERL5OPT} : ""; + local $ENV{PERL_USE_UNSAFE_INC} = exists $ENV{PERL_USE_UNSAFE_INC} ? $ENV{PERL_USE_UNSAFE_INC} : 1; # test $CPAN::META->set_perl5lib; local $ENV{MAKEFLAGS}; # protect us from outer make calls local $ENV{PERL_MM_USE_DEFAULT} = 1 if $CPAN::Config->{use_prompt_default}; @@ -3955,6 +3960,7 @@ sub install { : ($ENV{PERLLIB} || ""); local $ENV{PERL5OPT} = defined $ENV{PERL5OPT} ? $ENV{PERL5OPT} : ""; + # local $ENV{PERL_USE_UNSAFE_INC} = exists $ENV{PERL_USE_UNSAFE_INC} ? $ENV{PERL_USE_UNSAFE_INC} : 1; # install $CPAN::META->set_perl5lib; local $ENV{PERL_MM_USE_DEFAULT} = 1 if $CPAN::Config->{use_prompt_default}; local $ENV{NONINTERACTIVE_TESTING} = 1 if $CPAN::Config->{use_prompt_default}; diff --git a/cpan/Scalar-List-Utils/t/tainted.t b/cpan/Scalar-List-Utils/t/tainted.t index 9e0df5e..fb83c86 100644 --- a/cpan/Scalar-List-Utils/t/tainted.t +++ b/cpan/Scalar-List-Utils/t/tainted.t @@ -13,15 +13,10 @@ my $var = 2; ok( !tainted($var), 'known variable'); -my $key = (grep { !/^PERL/ } keys %ENV)[0]; +ok( tainted($^X), 'interpreter variable'); -SKIP: { # Skip these to get blead to pass, but the skip expires soon -skip 'is randomly failing', 2 unless $] gt 5.025009; -ok( tainted($ENV{$key}), 'environment variable'); - -$var = $ENV{$key}; -ok( tainted($var), 'copy of environment variable'); -} +$var = $^X; +ok( tainted($var), 'copy of interpreter variable'); { package Tainted; diff --git a/dist/Module-CoreList/Changes b/dist/Module-CoreList/Changes index 18726a3..b67728d 100644 --- a/dist/Module-CoreList/Changes +++ b/dist/Module-CoreList/Changes @@ -1,3 +1,6 @@ +5.20170220 + - Updated for v5.25.10 + 5.20170120 - Updated for v5.25.9 diff --git a/dist/Module-CoreList/lib/Module/CoreList.pm b/dist/Module-CoreList/lib/Module/CoreList.pm index 2d8b2a4..b6fcb30 100644 --- a/dist/Module-CoreList/lib/Module/CoreList.pm +++ b/dist/Module-CoreList/lib/Module/CoreList.pm @@ -3,7 +3,7 @@ use strict; use vars qw/$VERSION %released %version %families %upstream %bug_tracker %deprecated %delta/; use version; -$VERSION = '5.20170120'; +$VERSION = '5.20170220'; sub _undelta { my ($delta) = @_; @@ -317,6 +317,7 @@ sub changes_between { 5.022003 => '2017-01-14', 5.024001 => '2017-01-14', 5.025009 => '2017-01-20', + 5.025010 => '2017-02-20', ); for my $version ( sort { $a <=> $b } keys %released ) { @@ -13925,6 +13926,28 @@ for my $version ( sort { $a <=> $b } keys %released ) { removed => { } }, + 5.025010 => { + delta_from => 5.025009, + changed => { + 'B' => '1.68', + 'B::Op_private' => '5.025010', + 'CPAN' => '2.17', + 'CPAN::Distribution' => '2.17', + 'Config' => '5.02501', + 'Getopt::Std' => '1.12', + 'Module::CoreList' => '5.20170220', + 'Module::CoreList::TieHashDelta'=> '5.20170220', + 'Module::CoreList::Utils'=> '5.20170220', + 'PerlIO' => '1.10', + 'Storable' => '2.62', + 'Thread::Queue' => '3.12', + 'feature' => '1.47', + 'open' => '1.11', + 'threads' => '2.13', + }, + removed => { + } + }, ); sub is_core @@ -14656,6 +14679,13 @@ sub is_core removed => { } }, + 5.025010 => { + delta_from => 5.025009, + changed => { + }, + removed => { + } + }, ); %deprecated = _undelta(\%deprecated); diff --git a/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm b/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm index b088512..68cd8d6 100644 --- a/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm +++ b/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm @@ -3,7 +3,7 @@ package Module::CoreList::TieHashDelta; use strict; use vars qw($VERSION); -$VERSION = '5.20170120'; +$VERSION = '5.20170220'; sub TIEHASH { my ($class, $changed, $removed, $parent) = @_; diff --git a/dist/Module-CoreList/lib/Module/CoreList/Utils.pm b/dist/Module-CoreList/lib/Module/CoreList/Utils.pm index 4cfbd5f..c9dc903 100644 --- a/dist/Module-CoreList/lib/Module/CoreList/Utils.pm +++ b/dist/Module-CoreList/lib/Module/CoreList/Utils.pm @@ -5,7 +5,7 @@ use warnings; use vars qw[$VERSION %utilities]; use Module::CoreList; -$VERSION = '5.20170120'; +$VERSION = '5.20170220'; sub utilities { my $perl = shift; @@ -1235,6 +1235,13 @@ my %delta = ( 'pstruct' => 1, } }, + 5.025010 => { + delta_from => 5.025009, + changed => { + }, + removed => { + } + }, ); %utilities = Module::CoreList::_undelta(\%delta); diff --git a/dist/Net-Ping/t/010_pingecho.t b/dist/Net-Ping/t/010_pingecho.t index 5e05cde..6516d16 100644 --- a/dist/Net-Ping/t/010_pingecho.t +++ b/dist/Net-Ping/t/010_pingecho.t @@ -11,5 +11,9 @@ BEGIN { use Test::More tests => 2; BEGIN {use_ok('Net::Ping')}; -my $result = pingecho("127.0.0.1"); -is($result, 1, "pingecho works"); +TODO: { + local $TODO = "Not working on os390 smoker; may be a prermissions problem" + if $^O eq 'os390'; + my $result = pingecho("127.0.0.1"); + is($result, 1, "pingecho works"); +} diff --git a/dist/Storable/Storable.pm b/dist/Storable/Storable.pm index 397d584..d8fd740 100644 --- a/dist/Storable/Storable.pm +++ b/dist/Storable/Storable.pm @@ -22,7 +22,7 @@ package Storable; @ISA = qw(Exporter); use vars qw($canonical $forgive_me $VERSION); -$VERSION = '2.61'; +$VERSION = '2.62'; BEGIN { if (eval { diff --git a/dist/Storable/Storable.xs b/dist/Storable/Storable.xs index a72d84c..9ba48be 100644 --- a/dist/Storable/Storable.xs +++ b/dist/Storable/Storable.xs @@ -4048,7 +4048,7 @@ static SV *retrieve_idx_blessed(pTHX_ stcxt_t *cxt, const char *cname) */ static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname) { - I32 len; + U32 len; SV *sv; char buf[LG_BLESS + 1]; /* Avoid malloc() if possible */ char *classname = buf; @@ -4069,6 +4069,9 @@ static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname) if (len & 0x80) { RLEN(len); TRACEME(("** allocating %d bytes for class name", len+1)); + if (len > I32_MAX) { + CROAK(("Corrupted classname length")); + } New(10003, classname, len+1, char); malloced_classname = classname; } @@ -4119,7 +4122,7 @@ static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname) */ static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) { - I32 len; + U32 len; char buf[LG_BLESS + 1]; /* Avoid malloc() if possible */ char *classname = buf; unsigned int flags; @@ -4253,6 +4256,10 @@ static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) else GETMARK(len); + if (len > I32_MAX) { + CROAK(("Corrupted classname length")); + } + if (len > LG_BLESS) { TRACEME(("** allocating %d bytes for class name", len+1)); New(10003, classname, len+1, char); @@ -4274,6 +4281,11 @@ static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) TRACEME(("class name: %s", classname)); + if (!(flags & SHF_IDX_CLASSNAME) && classname != buf) { + /* some execution paths can throw an exception */ + SAVEFREEPV(classname); + } + /* * Decode user-frozen string length and read it in an SV. * @@ -4393,8 +4405,6 @@ static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) SEEN0_NN(sv, 0); SvRV_set(attached, NULL); SvREFCNT_dec(attached); - if (!(flags & SHF_IDX_CLASSNAME) && classname != buf) - Safefree(classname); return sv; } CROAK(("STORABLE_attach did not return a %s object", classname)); @@ -4475,8 +4485,6 @@ static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) SvREFCNT_dec(frozen); av_undef(av); sv_free((SV *) av); - if (!(flags & SHF_IDX_CLASSNAME) && classname != buf) - Safefree(classname); /* * If we had an type, then the object was not as simple, and diff --git a/dist/Storable/t/store.t b/dist/Storable/t/store.t index 3a4b9dc..b25dbd2 100644 --- a/dist/Storable/t/store.t +++ b/dist/Storable/t/store.t @@ -19,7 +19,7 @@ sub BEGIN { use Storable qw(store retrieve store_fd nstore_fd fd_retrieve); -use Test::More tests => 24; +use Test::More tests => 25; $a = 'toto'; $b = \$a; @@ -101,5 +101,15 @@ isnt($@, ''); } } +{ + + my $frozen = + "\x70\x73\x74\x30\x04\x0a\x08\x31\x32\x33\x34\x35\x36\x37\x38\x04\x08\x08\x08\x03\xff\x00\x00\x00\x19\x08\xff\x00\x00\x00\x08\x08\xf9\x16\x16\x13\x16\x10\x10\x10\xff\x15\x16\x16\x16\x1e\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x13\xf0\x16\x16\x16\xfe\x16\x41\x41\x41\x41\xe8\x03\x41\x41\x41\x41\x41\x41\x41\x41\x51\x41\xa9\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xb8\xac\xac\xac\xac\xac\xac\xac\xac\x9a\xac\xac\xac\xac\xac\xac\xac\xac\xac\x93\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\x00\x64\xac\xa8\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\xac\x2c\xac\x41\x41\x41\x41\x41\x41\x41\x41\x41\x00\x80\x41\x80\x41\x41\x41\x41\x41\x41\x51\x41\xac\xac\xac"; + open my $fh, '<', \$frozen; + eval { Storable::fd_retrieve($fh); }; + pass('RT 130635: no stack smashing error when retrieving hook'); + +} + close OUT or die "Could not close: $!"; END { 1 while unlink 'store' } diff --git a/dist/Thread-Queue/lib/Thread/Queue.pm b/dist/Thread-Queue/lib/Thread/Queue.pm index 9f896b7..c0d2180 100644 --- a/dist/Thread-Queue/lib/Thread/Queue.pm +++ b/dist/Thread-Queue/lib/Thread/Queue.pm @@ -3,7 +3,7 @@ package Thread::Queue; use strict; use warnings; -our $VERSION = '3.11'; +our $VERSION = '3.12'; $VERSION = eval $VERSION; use threads::shared 1.21; @@ -65,8 +65,8 @@ sub end lock(%$self); # No more data is coming $$self{'ENDED'} = 1; - # Try to release at least one blocked thread - cond_signal(%$self); + + cond_signal(%$self); # Unblock possibly waiting threads } # Return 1 or more items from the head of a queue, blocking if needed @@ -80,17 +80,21 @@ sub dequeue # Wait for requisite number of items cond_wait(%$self) while ((@$queue < $count) && ! $$self{'ENDED'}); - cond_signal(%$self) if ((@$queue >= $count) || $$self{'ENDED'}); # If no longer blocking, try getting whatever is left on the queue return $self->dequeue_nb($count) if ($$self{'ENDED'}); # Return single item - return shift(@$queue) if ($count == 1); + if ($count == 1) { + my $item = shift(@$queue); + cond_signal(%$self); # Unblock possibly waiting threads + return $item; + } # Return multiple items my @items; push(@items, shift(@$queue)) for (1..$count); + cond_signal(%$self); # Unblock possibly waiting threads return @items; } @@ -104,7 +108,11 @@ sub dequeue_nb my $count = @_ ? $self->_validate_count(shift) : 1; # Return single item - return shift(@$queue) if ($count == 1); + if ($count == 1) { + my $item = shift(@$queue); + cond_signal(%$self); # Unblock possibly waiting threads + return $item; + } # Return multiple items my @items; @@ -112,6 +120,7 @@ sub dequeue_nb last if (! @$queue); push(@items, shift(@$queue)); } + cond_signal(%$self); # Unblock possibly waiting threads return @items; } @@ -135,7 +144,6 @@ sub dequeue_timed while ((@$queue < $count) && ! $$self{'ENDED'}) { last if (! cond_timedwait(%$self, $timeout)); } - cond_signal(%$self) if ((@$queue >= $count) || $$self{'ENDED'}); # Get whatever we need off the queue if available return $self->dequeue_nb($count); @@ -187,8 +195,7 @@ sub insert # Add previous items back onto the queue push(@$queue, @tmp); - # Soup's up - cond_signal(%$self); + cond_signal(%$self); # Unblock possibly waiting threads } # Remove items from anywhere in a queue @@ -206,7 +213,7 @@ sub extract $index += @$queue; if ($index < 0) { $count += $index; - return if ($count <= 0); # Beyond the head of the queue + return if ($count <= 0); # Beyond the head of the queue return $self->dequeue_nb($count); # Extract from the head } } @@ -224,6 +231,8 @@ sub extract # Add back any removed items push(@$queue, @tmp); + cond_signal(%$self); # Unblock possibly waiting threads + # Return single item return $items[0] if ($count == 1); @@ -263,14 +272,19 @@ sub _validate_count if (! defined($count) || ! looks_like_number($count) || (int($count) != $count) || - ($count < 1)) + ($count < 1) || + ($$self{'LIMIT'} && $count > $$self{'LIMIT'})) { require Carp; my ($method) = (caller(1))[3]; my $class_name = ref($self); $method =~ s/$class_name\:://; $count = 'undef' if (! defined($count)); - Carp::croak("Invalid 'count' argument ($count) to '$method' method"); + if ($$self{'LIMIT'} && $count > $$self{'LIMIT'}) { + Carp::croak("'count' argument ($count) to '$method' method exceeds queue size limit ($$self{'LIMIT'})"); + } else { + Carp::croak("Invalid 'count' argument ($count) to '$method' method"); + } } return $count; @@ -304,7 +318,7 @@ Thread::Queue - Thread-safe queues =head1 VERSION -This document describes Thread::Queue version 3.11 +This document describes Thread::Queue version 3.12 =head1 SYNOPSIS @@ -494,6 +508,9 @@ C does not prevent enqueuing items beyond that count: # 'undef') $q->limit = 0; # Queue size is now unlimited +Calling any of the dequeue methods with C greater than a queue's +C will generate an error. + =item ->end() Declares that no more items will be added to the queue. diff --git a/dist/Thread-Queue/t/01_basic.t b/dist/Thread-Queue/t/01_basic.t index 4ec5195..2983f0b 100644 --- a/dist/Thread-Queue/t/01_basic.t +++ b/dist/Thread-Queue/t/01_basic.t @@ -13,7 +13,7 @@ use threads; use Thread::Queue; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/02_refs.t b/dist/Thread-Queue/t/02_refs.t index fdf8f6b..0cebdc1 100644 --- a/dist/Thread-Queue/t/02_refs.t +++ b/dist/Thread-Queue/t/02_refs.t @@ -14,7 +14,7 @@ use threads::shared; use Thread::Queue; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/03_peek.t b/dist/Thread-Queue/t/03_peek.t index 29ef75e..d543b59 100644 --- a/dist/Thread-Queue/t/03_peek.t +++ b/dist/Thread-Queue/t/03_peek.t @@ -13,7 +13,7 @@ use threads; use Thread::Queue; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/05_extract.t b/dist/Thread-Queue/t/05_extract.t index d8cb417..de0e78b 100644 --- a/dist/Thread-Queue/t/05_extract.t +++ b/dist/Thread-Queue/t/05_extract.t @@ -13,7 +13,7 @@ use threads; use Thread::Queue; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/06_insert.t b/dist/Thread-Queue/t/06_insert.t index 93617e1..4f9d1df 100644 --- a/dist/Thread-Queue/t/06_insert.t +++ b/dist/Thread-Queue/t/06_insert.t @@ -13,7 +13,7 @@ use threads; use Thread::Queue; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/07_lock.t b/dist/Thread-Queue/t/07_lock.t index 6337221..b20e060 100644 --- a/dist/Thread-Queue/t/07_lock.t +++ b/dist/Thread-Queue/t/07_lock.t @@ -14,7 +14,7 @@ use Thread::Queue; use Thread::Semaphore; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/10_timed.t b/dist/Thread-Queue/t/10_timed.t index da8b03a..8404720 100644 --- a/dist/Thread-Queue/t/10_timed.t +++ b/dist/Thread-Queue/t/10_timed.t @@ -13,7 +13,7 @@ use threads; use Thread::Queue; if ($] == 5.008) { - require './t/test.pl'; # Test::More work-alike for Perl 5.8.0 + require 't/test.pl'; # Test::More work-alike for Perl 5.8.0 } else { require Test::More; } diff --git a/dist/Thread-Queue/t/11_limit.t b/dist/Thread-Queue/t/11_limit.t index 1bd88b3..12f351b 100644 --- a/dist/Thread-Queue/t/11_limit.t +++ b/dist/Thread-Queue/t/11_limit.t @@ -19,7 +19,7 @@ use Thread::Queue; use Test::More; -plan tests => 8; +plan tests => 13; my $q = Thread::Queue->new(); my $rpt = Thread::Queue->new(); @@ -82,12 +82,12 @@ $rpt->enqueue($q->pending); # q = (4, 5, 'foo'); r = (4, 3, 4, 3) # Read all items from queue -my @item = $q->dequeue(3); -is_deeply(\@item, [4, 5, 'foo'], 'Dequeued 3 items'); +my @items = $q->dequeue(3); +is_deeply(\@items, [4, 5, 'foo'], 'Dequeued 3 items'); # Thread is now unblocked -@item = $q->dequeue(2); -is_deeply(\@item, [6, 7], 'Dequeued 2 items'); +@items = $q->dequeue(2); +is_deeply(\@items, [6, 7], 'Dequeued 2 items'); # Thread is now unblocked # Handshake with thread @@ -96,6 +96,37 @@ $rpt->enqueue('go'); # (7) - Done $th->join; +# It's an error to call dequeue methods with COUNT > LIMIT +eval { $q->dequeue(5); }; +like($@, qr/exceeds queue size limit/, $@); + +# Bug #120157 +# Fix deadlock from combination of dequeue_nb, enqueue and queue size limit + +# (1) Fill queue +$q->enqueue(1..3); +is($q->pending, 3, 'Queue loaded'); + +# (2) Thread will block trying to add to full queue +$th = threads->create( sub { + $q->enqueue(99); + return('OK'); +}); +threads->yield(); + +# (3) Dequeue an item so that thread can unblock +is($q->dequeue_nb(), 1, 'Dequeued item'); + +# (4) Thread unblocks +is($th->join(), 'OK', 'Thread exited'); + +# (5) Fetch queue to show thread's item was enqueued +@items = (); +while (my $item = $q->dequeue_nb()) { + push(@items, $item); +} +is_deeply(\@items, [2,3,99], 'Dequeued remaining'); + exit(0); # EOF diff --git a/dist/threads/lib/threads.pm b/dist/threads/lib/threads.pm index 14bf920..b2bd872 100644 --- a/dist/threads/lib/threads.pm +++ b/dist/threads/lib/threads.pm @@ -5,7 +5,7 @@ use 5.008; use strict; use warnings; -our $VERSION = '2.12'; +our $VERSION = '2.13'; my $XS_VERSION = $VERSION; $VERSION = eval $VERSION; diff --git a/dist/threads/threads.xs b/dist/threads/threads.xs index 579fff3..8382765 100644 --- a/dist/threads/threads.xs +++ b/dist/threads/threads.xs @@ -1022,7 +1022,7 @@ S_ithread_create( #endif } #if defined(__clang__) || defined(__clang) -CLANG_DIAG_RESTORE; +CLANG_DIAG_RESTORE #endif #endif /* USE_ITHREADS */ diff --git a/doio.c b/doio.c index 8ca9c4b..becb19b 100644 --- a/doio.c +++ b/doio.c @@ -761,7 +761,11 @@ S_openn_cleanup(pTHX_ GV *gv, IO *io, PerlIO *fp, char *mode, const char *oname, #if defined(HAS_FCNTL) && defined(F_SETFD) /* The dup trick has lost close-on-exec on ofd, * and possibly any other flags, so restore them. */ - fcntl(ofd,F_SETFD, fd_flags); + if (fcntl(ofd,F_SETFD, fd_flags) < 0) { + if (dupfd >= 0) + PerlLIO_close(dupfd); + goto say_false; + } #endif PerlLIO_close(dupfd); } @@ -2113,7 +2117,7 @@ Perl_cando(pTHX_ Mode_t mode, bool effective, const Stat_t *statbufp) /* Atari stat() does pretty much the same thing. we set x_bit_set_in_stat * too so it will actually look into the files for magic numbers */ - return (mode & statbufp->st_mode) ? TRUE : FALSE; + return cBOOL(mode & statbufp->st_mode); #else /* ! DOSISH */ # ifdef __CYGWIN__ diff --git a/doop.c b/doop.c index e4b2cd8..b5c1003 100644 --- a/doop.c +++ b/doop.c @@ -1065,16 +1065,16 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ } if (left_utf || right_utf) { - UV duc, luc, ruc; char *dcorig = dc; char *dcsave = NULL; STRLEN lulen = leftlen; STRLEN rulen = rightlen; - STRLEN ulen; switch (optype) { case OP_BIT_AND: while (lulen && rulen) { + UV duc, luc, ruc; + STRLEN ulen; luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; @@ -1097,6 +1097,8 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) break; case OP_BIT_XOR: while (lulen && rulen) { + UV duc, luc, ruc; + STRLEN ulen; luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; @@ -1114,6 +1116,8 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) goto mop_up_utf; case OP_BIT_OR: while (lulen && rulen) { + UV duc, luc, ruc; + STRLEN ulen; luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); lc += ulen; lulen -= ulen; diff --git a/dquote.c b/dquote.c index ef03046..e02308e 100644 --- a/dquote.c +++ b/dquote.c @@ -46,10 +46,10 @@ Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning) clearer[i++] = result; clearer[i++] = '\0'; - Perl_ck_warner_d(aTHX_ packWARN2(WARN_SYNTAX,WARN_DEPRECATED), - "\"\\c%c\" is more clearly written simply as \"%s\". " - "This will be a fatal error in Perl 5.28", - source, clearer); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "\"\\c%c\" is more clearly written simply as \"%s\"", + source, + clearer); } return result; diff --git a/dump.c b/dump.c index 3915af1..52b52ca 100644 --- a/dump.c +++ b/dump.c @@ -205,16 +205,16 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str, chsize = 1; break; default: - if ( (flags & PERL_PV_ESCAPE_DWIM) && c != '\0' ) { + if ( (flags & PERL_PV_ESCAPE_DWIM) && c != '\0' ) { chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, isuni ? "%cx{%02" UVxf "}" : "%cx%02" UVxf, esc, u); - } - else if ( (pv+readsize < end) && isDIGIT((U8)*(pv+readsize)) ) - chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, + } + else if ((pv+readsize < end) && isDIGIT((U8)*(pv+readsize))) + chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, "%c%03o", esc, c); - else - chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, + else + chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, "%c%o", esc, c); } } else { @@ -523,6 +523,86 @@ Perl_dump_vindent(pTHX_ I32 level, PerlIO *file, const char* pat, va_list *args) PerlIO_vprintf(file, pat, *args); } + +/* Like Perl_dump_indent(), but specifically for ops: adds a vertical bar + * for each indent level as appropriate. + * + * bar contains bits indicating which indent columns should have a + * vertical bar displayed. Bit 0 is the RH-most column. If there are more + * levels than bits in bar, then the first few indents are displayed + * without a bar. + * + * The start of a new op is signalled by passing a value for level which + * has been negated and offset by 1 (so that level 0 is passed as -1 and + * can thus be distinguished from -0); in this case, emit a suitably + * indented blank line, then on the next line, display the op's sequence + * number, and make the final indent an '+----'. + * + * e.g. + * + * | FOO # level = 1, bar = 0b1 + * | | # level =-2-1, bar = 0b11 + * 1234 | +---BAR + * | BAZ # level = 2, bar = 0b10 + */ + +static void +S_opdump_indent(pTHX_ const OP *o, I32 level, UV bar, PerlIO *file, + const char* pat, ...) +{ + va_list args; + I32 i; + bool newop = (level < 0); + + va_start(args, pat); + + /* start displaying a new op? */ + if (newop) { + UV seq = sequence_num(o); + + level = -level - 1; + + /* output preceding blank line */ + PerlIO_puts(file, " "); + for (i = level-1; i >= 0; i--) + PerlIO_puts(file, i == 0 || (bar & (1 << i)) ? "| " : " "); + PerlIO_puts(file, "\n"); + + /* output sequence number */ + if (seq) + PerlIO_printf(file, "%-4" UVuf " ", seq); + else + PerlIO_puts(file, "???? "); + + } + else + PerlIO_printf(file, " "); + + for (i = level-1; i >= 0; i--) + PerlIO_puts(file, + (i == 0 && newop) ? "+--" + : (bar & (1 << i)) ? "| " + : " "); + PerlIO_vprintf(file, pat, args); + va_end(args); +} + + +/* display a link field (e.g. op_next) in the format + * ====> sequence_number [opname 0x123456] + */ + +static void +S_opdump_link(pTHX_ const OP *o, PerlIO *file) +{ + PerlIO_puts(file, " ===> "); + if (o) + PerlIO_printf(file, "%" UVuf " [%s 0x%" UVxf "]\n", + sequence_num(o), OP_NAME(o), PTR2UV(o)); + else + PerlIO_puts(file, "[0x0]\n"); +} + /* =for apidoc dump_all @@ -604,27 +684,33 @@ Perl_dump_sub(pTHX_ const GV *gv) void Perl_dump_sub_perl(pTHX_ const GV *gv, bool justperl) { - STRLEN len; - SV * const sv = newSVpvs_flags("", SVs_TEMP); - SV *tmpsv; - const char * name; + CV *cv; PERL_ARGS_ASSERT_DUMP_SUB_PERL; - if (justperl && (CvISXSUB(GvCV(gv)) || !CvROOT(GvCV(gv)))) + cv = isGV_with_GP(gv) ? GvCV(gv) : + (assert(SvROK((SV*)gv)), (CV*)SvRV((SV*)gv)); + if (justperl && (CvISXSUB(cv) || !CvROOT(cv))) return; - tmpsv = newSVpvs_flags("", SVs_TEMP); - gv_fullname3(sv, gv, NULL); - name = SvPV_const(sv, len); - Perl_dump_indent(aTHX_ 0, Perl_debug_log, "\nSUB %s = ", - generic_pv_escape(tmpsv, name, len, SvUTF8(sv))); - if (CvISXSUB(GvCV(gv))) + if (isGV_with_GP(gv)) { + SV * const namesv = newSVpvs_flags("", SVs_TEMP); + SV *escsv = newSVpvs_flags("", SVs_TEMP); + const char *namepv; + STRLEN namelen; + gv_fullname3(namesv, gv, NULL); + namepv = SvPV_const(namesv, namelen); + Perl_dump_indent(aTHX_ 0, Perl_debug_log, "\nSUB %s = ", + generic_pv_escape(escsv, namepv, namelen, SvUTF8(namesv))); + } else { + Perl_dump_indent(aTHX_ 0, Perl_debug_log, "\nSUB = "); + } + if (CvISXSUB(cv)) Perl_dump_indent(aTHX_ 0, Perl_debug_log, "(xsub 0x%" UVxf " %d)\n", - PTR2UV(CvXSUB(GvCV(gv))), - (int)CvXSUBANY(GvCV(gv)).any_i32); - else if (CvROOT(GvCV(gv))) - op_dump(CvROOT(GvCV(gv))); + PTR2UV(CvXSUB(cv)), + (int)CvXSUBANY(cv).any_i32); + else if (CvROOT(cv)) + op_dump(CvROOT(cv)); else Perl_dump_indent(aTHX_ 0, Perl_debug_log, "\n"); } @@ -650,51 +736,103 @@ Perl_dump_eval(pTHX) op_dump(PL_eval_root); } -void -Perl_do_pmop_dump(pTHX_ I32 level, PerlIO *file, const PMOP *pm) + +/* returns a temp SV displaying the name of a GV. Handles the case where + * a GV is in fact a ref to a CV */ + +static SV * +S_gv_display(pTHX_ GV *gv) { - char ch; + SV * const name = newSVpvs_flags("", SVs_TEMP); + if (gv) { + SV * const raw = newSVpvs_flags("", SVs_TEMP); + STRLEN len; + const char * rawpv; + + if (isGV_with_GP(gv)) + gv_fullname3(raw, gv, NULL); + else { + assert(SvROK(gv)); + assert(SvTYPE(SvRV(gv)) == SVt_PVCV); + Perl_sv_catpvf(aTHX_ raw, "cv ref: %s", + SvPV_nolen_const(cv_name((CV *)SvRV(gv), name, 0))); + } + rawpv = SvPV_const(raw, len); + generic_pv_escape(name, rawpv, len, SvUTF8(raw)); + } + else + sv_catpvs(name, "(NULL)"); - PERL_ARGS_ASSERT_DO_PMOP_DUMP; + return name; +} + + + +/* forward decl */ +static void +S_do_op_dump_bar(pTHX_ I32 level, UV bar, PerlIO *file, const OP *o); + + +static void +S_do_pmop_dump_bar(pTHX_ I32 level, UV bar, PerlIO *file, const PMOP *pm) +{ + UV kidbar; if (!pm) return; - if (pm->op_pmflags & PMf_ONCE) - ch = '?'; - else - ch = '/'; - if (PM_GETRE(pm)) - Perl_dump_indent(aTHX_ level, file, "PMf_PRE %c%.*s%c\n", + + kidbar = ((bar << 1) | cBOOL(pm->op_flags & OPf_KIDS)) << 1; + + if (PM_GETRE(pm)) { + char ch = (pm->op_pmflags & PMf_ONCE) ? '?' : '/'; + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, "PMf_PRE %c%.*s%c\n", ch,(int)RX_PRELEN(PM_GETRE(pm)), RX_PRECOMP(PM_GETRE(pm)), ch); + } else - Perl_dump_indent(aTHX_ level, file, "PMf_PRE (RUNTIME)\n"); + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, "PMf_PRE (RUNTIME)\n"); + + if (pm->op_pmflags || (PM_GETRE(pm) && RX_CHECK_SUBSTR(PM_GETRE(pm)))) { + SV * const tmpsv = pm_description(pm); + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, "PMFLAGS = (%s)\n", + SvCUR(tmpsv) ? SvPVX_const(tmpsv) + 1 : ""); + SvREFCNT_dec_NN(tmpsv); + } if (pm->op_type == OP_SPLIT) - Perl_dump_indent(aTHX_ level, file, "TARGOFF/GV = 0x%" UVxf "\n", - PTR2UV(pm->op_pmreplrootu.op_pmtargetgv)); + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, + "TARGOFF/GV = 0x%" UVxf "\n", + PTR2UV(pm->op_pmreplrootu.op_pmtargetgv)); else { if (pm->op_pmreplrootu.op_pmreplroot) { - Perl_dump_indent(aTHX_ level, file, "PMf_REPL = "); - op_dump(pm->op_pmreplrootu.op_pmreplroot); + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, "PMf_REPL =\n"); + S_do_op_dump_bar(aTHX_ level + 2, + (kidbar|cBOOL(OpHAS_SIBLING(pm->op_pmreplrootu.op_pmreplroot))), + file, pm->op_pmreplrootu.op_pmreplroot); } } if (pm->op_code_list) { if (pm->op_pmflags & PMf_CODELIST_PRIVATE) { - Perl_dump_indent(aTHX_ level, file, "CODE_LIST =\n"); - do_op_dump(level, file, pm->op_code_list); + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, "CODE_LIST =\n"); + S_do_op_dump_bar(aTHX_ level + 2, + (kidbar | cBOOL(OpHAS_SIBLING(pm->op_code_list))), + file, pm->op_code_list); } else - Perl_dump_indent(aTHX_ level, file, "CODE_LIST = 0x%" UVxf "\n", - PTR2UV(pm->op_code_list)); - } - if (pm->op_pmflags || (PM_GETRE(pm) && RX_CHECK_SUBSTR(PM_GETRE(pm)))) { - SV * const tmpsv = pm_description(pm); - Perl_dump_indent(aTHX_ level, file, "PMFLAGS = (%s)\n", SvCUR(tmpsv) ? SvPVX_const(tmpsv) + 1 : ""); - SvREFCNT_dec_NN(tmpsv); + S_opdump_indent(aTHX_ (OP*)pm, level, bar, file, + "CODE_LIST = 0x%" UVxf "\n", PTR2UV(pm->op_code_list)); } } + +void +Perl_do_pmop_dump(pTHX_ I32 level, PerlIO *file, const PMOP *pm) +{ + PERL_ARGS_ASSERT_DO_PMOP_DUMP; + S_do_pmop_dump_bar(aTHX_ level, 0, file, pm); +} + + const struct flag_to_name pmflags_flags_names[] = { {PMf_CONST, ",CONST"}, {PMf_KEEP, ",KEEP"}, @@ -791,41 +929,61 @@ const struct flag_to_name op_flags_names[] = { }; -void -Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) +/* indexed by enum OPclass */ +const char * op_class_names[] = { + "NULL", + "OP", + "UNOP", + "BINOP", + "LOGOP", + "LISTOP", + "PMOP", + "SVOP", + "PADOP", + "PVOP", + "LOOP", + "COP", + "METHOP", + "UNOP_AUX", +}; + + +/* dump an op and any children. level indicates the initial indent. + * The bits of bar indicate which indents should receive a vertical bar. + * For example if level == 5 and bar == 0b01101, then the indent prefix + * emitted will be (not including the <>'s): + * + * < | | | > + * 55554444333322221111 + * + * For heavily nested output, the level may exceed the number of bits + * in bar; in this case the first few columns in the output will simply + * not have a bar, which is harmless. + */ + +static void +S_do_op_dump_bar(pTHX_ I32 level, UV bar, PerlIO *file, const OP *o) { - UV seq; const OPCODE optype = o->op_type; PERL_ARGS_ASSERT_DO_OP_DUMP; - Perl_dump_indent(aTHX_ level, file, "{\n"); - level++; - seq = sequence_num(o); - if (seq) - PerlIO_printf(file, "%-4" UVuf, seq); - else - PerlIO_printf(file, "????"); - PerlIO_printf(file, - "%*sTYPE = %s ===> ", - (int)(PL_dumpindent*level-4), "", OP_NAME(o)); - if (o->op_next) - PerlIO_printf(file, - o->op_type == OP_NULL ? "(%" UVuf ")\n" : "%" UVuf "\n", - sequence_num(o->op_next)); - else - PerlIO_printf(file, "NULL\n"); - if (o->op_targ) { - if (optype == OP_NULL) { - Perl_dump_indent(aTHX_ level, file, " (was %s)\n", PL_op_name[o->op_targ]); - } - else - Perl_dump_indent(aTHX_ level, file, "TARG = %ld\n", (long)o->op_targ); - } -#ifdef DUMPADDR - Perl_dump_indent(aTHX_ level, file, "ADDR = 0x%" UVxf " => 0x%" UVxf "\n", - (UV)o, (UV)o->op_next); -#endif + /* print op header line */ + + S_opdump_indent(aTHX_ o, -level-1, bar, file, "%s", OP_NAME(o)); + + if (optype == OP_NULL && o->op_targ) + PerlIO_printf(file, " (ex-%s)",PL_op_name[o->op_targ]); + + PerlIO_printf(file, " %s(0x%" UVxf ")", + op_class_names[op_class(o)], PTR2UV(o)); + S_opdump_link(aTHX_ o->op_next, file); + + /* print op common fields */ + + if (o->op_targ && optype != OP_NULL) + S_opdump_indent(aTHX_ o, level, bar, file, "TARG = %ld\n", + (long)o->op_targ); if (o->op_flags || o->op_slabbed || o->op_savefree || o->op_static) { SV * const tmpsv = newSVpvs(""); @@ -849,7 +1007,7 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) if (o->op_static) sv_catpvs(tmpsv, ",STATIC"); if (o->op_folded) sv_catpvs(tmpsv, ",FOLDED"); if (o->op_moresib) sv_catpvs(tmpsv, ",MORESIB"); - Perl_dump_indent(aTHX_ level, file, "FLAGS = (%s)\n", + S_opdump_indent(aTHX_ o, level, bar, file, "FLAGS = (%s)\n", SvCUR(tmpsv) ? SvPVX_const(tmpsv) + 1 : ""); } @@ -933,10 +1091,11 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) } } if (tmpsv && SvCUR(tmpsv)) { - Perl_dump_indent(aTHX_ level, file, "PRIVATE = (%s)\n", SvPVX_const(tmpsv) + 1); + S_opdump_indent(aTHX_ o, level, bar, file, "PRIVATE = (%s)\n", + SvPVX_const(tmpsv) + 1); } else - Perl_dump_indent(aTHX_ level, file, "PRIVATE = (0x%" UVxf ")\n", - (UV)oppriv); + S_opdump_indent(aTHX_ o, level, bar, file, + "PRIVATE = (0x%" UVxf ")\n", (UV)oppriv); } switch (optype) { @@ -944,22 +1103,12 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) case OP_GVSV: case OP_GV: #ifdef USE_ITHREADS - Perl_dump_indent(aTHX_ level, file, "PADIX = %" IVdf "\n", (IV)cPADOPo->op_padix); + S_opdump_indent(aTHX_ o, level, bar, file, + "PADIX = %" IVdf "\n", (IV)cPADOPo->op_padix); #else - if ( ! (o->op_flags & OPf_SPECIAL)) { /* not lexical */ - if (cSVOPo->op_sv) { - STRLEN len; - const char * name; - SV * const tmpsv = newSVpvs_flags("", SVs_TEMP); - SV * const tmpsv2 = newSVpvs_flags("", SVs_TEMP); - gv_fullname3(tmpsv, MUTABLE_GV(cSVOPo->op_sv), NULL); - name = SvPV_const(tmpsv, len); - Perl_dump_indent(aTHX_ level, file, "GV = %s\n", - generic_pv_escape( tmpsv2, name, len, SvUTF8(tmpsv))); - } - else - Perl_dump_indent(aTHX_ level, file, "GV = NULL\n"); - } + S_opdump_indent(aTHX_ o, level, bar, file, + "GV = %" SVf " (0x%" UVxf ")\n", + SVfARG(S_gv_display(aTHX_ cGVOPo_gv)), PTR2UV(cGVOPo_gv)); #endif break; @@ -968,9 +1117,10 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) UNOP_AUX_item *items = cUNOP_AUXo->op_aux; UV i, count = items[-1].uv; - Perl_dump_indent(aTHX_ level, file, "ARGS = \n"); + S_opdump_indent(aTHX_ o, level, bar, file, "ARGS = \n"); for (i=0; i < count; i++) - Perl_dump_indent(aTHX_ level+1, file, "%" UVuf " => 0x%" UVxf "\n", + S_opdump_indent(aTHX_ o, level+1, (bar << 1), file, + "%" UVuf " => 0x%" UVxf "\n", i, items[i].uv); break; } @@ -984,7 +1134,8 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) #ifndef USE_ITHREADS /* with ITHREADS, consts are stored in the pad, and the right pad * may not be active here, so skip */ - Perl_dump_indent(aTHX_ level, file, "SV = %s\n", SvPEEK(cMETHOPx_meth(o))); + S_opdump_indent(aTHX_ o, level, bar, file, "SV = %s\n", + SvPEEK(cMETHOPx_meth(o))); #endif break; case OP_NULL: @@ -994,64 +1145,69 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) case OP_NEXTSTATE: case OP_DBSTATE: if (CopLINE(cCOPo)) - Perl_dump_indent(aTHX_ level, file, "LINE = %" UVuf "\n", + S_opdump_indent(aTHX_ o, level, bar, file, "LINE = %" UVuf "\n", (UV)CopLINE(cCOPo)); - if (CopSTASHPV(cCOPo)) { - SV* tmpsv = newSVpvs_flags("", SVs_TEMP); - HV *stash = CopSTASH(cCOPo); - const char * const hvname = HvNAME_get(stash); - - Perl_dump_indent(aTHX_ level, file, "PACKAGE = \"%s\"\n", - generic_pv_escape(tmpsv, hvname, - HvNAMELEN(stash), HvNAMEUTF8(stash))); - } - if (CopLABEL(cCOPo)) { - SV* tmpsv = newSVpvs_flags("", SVs_TEMP); - STRLEN label_len; - U32 label_flags; - const char *label = CopLABEL_len_flags(cCOPo, - &label_len, &label_flags); - Perl_dump_indent(aTHX_ level, file, "LABEL = \"%s\"\n", - generic_pv_escape( tmpsv, label, label_len, - (label_flags & SVf_UTF8))); - } - Perl_dump_indent(aTHX_ level, file, "SEQ = %u\n", + + if (CopSTASHPV(cCOPo)) { + SV* tmpsv = newSVpvs_flags("", SVs_TEMP); + HV *stash = CopSTASH(cCOPo); + const char * const hvname = HvNAME_get(stash); + + S_opdump_indent(aTHX_ o, level, bar, file, "PACKAGE = \"%s\"\n", + generic_pv_escape(tmpsv, hvname, + HvNAMELEN(stash), HvNAMEUTF8(stash))); + } + + if (CopLABEL(cCOPo)) { + SV* tmpsv = newSVpvs_flags("", SVs_TEMP); + STRLEN label_len; + U32 label_flags; + const char *label = CopLABEL_len_flags(cCOPo, + &label_len, &label_flags); + S_opdump_indent(aTHX_ o, level, bar, file, "LABEL = \"%s\"\n", + generic_pv_escape( tmpsv, label, label_len, + (label_flags & SVf_UTF8))); + } + + S_opdump_indent(aTHX_ o, level, bar, file, "SEQ = %u\n", (unsigned int)cCOPo->cop_seq); break; + + case OP_ENTERITER: case OP_ENTERLOOP: - Perl_dump_indent(aTHX_ level, file, "REDO ===> "); - if (cLOOPo->op_redoop) - PerlIO_printf(file, "%" UVuf "\n", sequence_num(cLOOPo->op_redoop)); - else - PerlIO_printf(file, "DONE\n"); - Perl_dump_indent(aTHX_ level, file, "NEXT ===> "); - if (cLOOPo->op_nextop) - PerlIO_printf(file, "%" UVuf "\n", sequence_num(cLOOPo->op_nextop)); - else - PerlIO_printf(file, "DONE\n"); - Perl_dump_indent(aTHX_ level, file, "LAST ===> "); - if (cLOOPo->op_lastop) - PerlIO_printf(file, "%" UVuf "\n", sequence_num(cLOOPo->op_lastop)); - else - PerlIO_printf(file, "DONE\n"); + S_opdump_indent(aTHX_ o, level, bar, file, "REDO"); + S_opdump_link(aTHX_ cLOOPo->op_redoop, file); + S_opdump_indent(aTHX_ o, level, bar, file, "NEXT"); + S_opdump_link(aTHX_ cLOOPo->op_nextop, file); + S_opdump_indent(aTHX_ o, level, bar, file, "LAST"); + S_opdump_link(aTHX_ cLOOPo->op_lastop, file); break; + + case OP_REGCOMP: + case OP_SUBSTCONT: case OP_COND_EXPR: case OP_RANGE: case OP_MAPWHILE: case OP_GREPWHILE: case OP_OR: + case OP_DOR: case OP_AND: - Perl_dump_indent(aTHX_ level, file, "OTHER ===> "); - if (cLOGOPo->op_other) - PerlIO_printf(file, "%" UVuf "\n", sequence_num(cLOGOPo->op_other)); - else - PerlIO_printf(file, "DONE\n"); + case OP_ORASSIGN: + case OP_DORASSIGN: + case OP_ANDASSIGN: + case OP_ARGDEFELEM: + case OP_ENTERGIVEN: + case OP_ENTERWHEN: + case OP_ENTERTRY: + case OP_ONCE: + S_opdump_indent(aTHX_ o, level, bar, file, "OTHER"); + S_opdump_link(aTHX_ cLOGOPo->op_other, file); break; case OP_SPLIT: case OP_MATCH: case OP_QR: case OP_SUBST: - do_pmop_dump(level, file, cPMOPo); + S_do_pmop_dump_bar(aTHX_ level, bar, file, cPMOPo); break; case OP_LEAVE: case OP_LEAVEEVAL: @@ -1060,19 +1216,55 @@ Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) case OP_LEAVEWRITE: case OP_SCOPE: if (o->op_private & OPpREFCOUNTED) - Perl_dump_indent(aTHX_ level, file, "REFCNT = %" UVuf "\n", (UV)o->op_targ); + S_opdump_indent(aTHX_ o, level, bar, file, + "REFCNT = %" UVuf "\n", (UV)o->op_targ); break; + + case OP_DUMP: + case OP_GOTO: + case OP_NEXT: + case OP_LAST: + case OP_REDO: + if (o->op_flags & (OPf_SPECIAL|OPf_STACKED|OPf_KIDS)) + break; + /* FALLTHROUGH */ + case OP_TRANS: + case OP_TRANSR: + if ( (o->op_type == OP_TRANS || o->op_type == OP_TRANSR) + && (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))) + break; + + { + SV * const label = newSVpvs_flags("", SVs_TEMP); + generic_pv_escape(label, cPVOPo->op_pv, strlen(cPVOPo->op_pv), 0); + S_opdump_indent(aTHX_ o, level, bar, file, + "PV = \"%" SVf "\" (0x%" UVxf ")\n", + SVfARG(label), PTR2UV(cPVOPo->op_pv)); + } + + default: break; } if (o->op_flags & OPf_KIDS) { OP *kid; + level++; + bar <<= 1; for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) - do_op_dump(level, file, kid); + S_do_op_dump_bar(aTHX_ level, + (bar | cBOOL(OpHAS_SIBLING(kid))), + file, kid); } - Perl_dump_indent(aTHX_ level-1, file, "}\n"); } + +void +Perl_do_op_dump(pTHX_ I32 level, PerlIO *file, const OP *o) +{ + S_do_op_dump_bar(aTHX_ level, 0, file, o); +} + + /* =for apidoc op_dump @@ -1305,7 +1497,7 @@ Perl_do_gvgv_dump(pTHX_ I32 level, PerlIO *file, const char *name, GV *sv) const char *hvname; HV * const stash = GvSTASH(sv); PerlIO_printf(file, "\t"); - /* TODO might have an extra \" here */ + /* TODO might have an extra \" here */ if (stash && (hvname = HvNAME_get(stash))) { PerlIO_printf(file, "\"%s\" :: \"", generic_pv_escape(tmp, hvname, @@ -1700,7 +1892,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo (UV)aux->xhv_aux_flags); } Perl_dump_indent(aTHX_ level, file, " ARRAY = 0x%" UVxf, PTR2UV(HvARRAY(sv))); - usedkeys = HvUSEDKEYS(sv); + usedkeys = HvUSEDKEYS(MUTABLE_HV(sv)); if (HvARRAY(sv) && usedkeys) { /* Show distribution of HEs in the ARRAY */ int freq[200]; @@ -1801,8 +1993,8 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo { const char * const hvname = HvNAME_get(sv); if (hvname) { - SV* tmpsv = newSVpvs_flags("", SVs_TEMP); - Perl_dump_indent(aTHX_ level, file, " NAME = \"%s\"\n", + SV* tmpsv = newSVpvs_flags("", SVs_TEMP); + Perl_dump_indent(aTHX_ level, file, " NAME = \"%s\"\n", generic_pv_escape( tmpsv, hvname, HvNAMELEN(sv), HvNAMEUTF8(sv))); } @@ -1828,7 +2020,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo + (count < 0 ? -count : count); while (hekp < endp) { if (*hekp) { - SV *tmp = newSVpvs_flags("", SVs_TEMP); + SV *tmp = newSVpvs_flags("", SVs_TEMP); Perl_sv_catpvf(aTHX_ names, ", \"%s\"", generic_pv_escape(tmp, HEK_KEY(*hekp), HEK_LEN(*hekp), HEK_UTF8(*hekp))); } else { @@ -1938,14 +2130,14 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo case SVt_PVCV: if (CvAUTOLOAD(sv)) { SV* tmpsv = newSVpvs_flags("", SVs_TEMP); - STRLEN len; + STRLEN len; const char *const name = SvPV_const(sv, len); Perl_dump_indent(aTHX_ level, file, " AUTOLOAD = \"%s\"\n", generic_pv_escape(tmpsv, name, len, SvUTF8(sv))); } if (SvPOK(sv)) { - SV* tmpsv = newSVpvs_flags("", SVs_TEMP); - const char *const proto = CvPROTO(sv); + SV* tmpsv = newSVpvs_flags("", SVs_TEMP); + const char *const proto = CvPROTO(sv); Perl_dump_indent(aTHX_ level, file, " PROTOTYPE = \"%s\"\n", generic_pv_escape(tmpsv, proto, CvPROTOLEN(sv), SvUTF8(sv))); @@ -2040,13 +2232,13 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo if (isREGEXP(sv)) goto dumpregexp; if (!isGV_with_GP(sv)) break; - { - SV* tmpsv = newSVpvs_flags("", SVs_TEMP); - Perl_dump_indent(aTHX_ level, file, " NAME = \"%s\"\n", - generic_pv_escape(tmpsv, GvNAME(sv), - GvNAMELEN(sv), - GvNAMEUTF8(sv))); - } + { + SV* tmpsv = newSVpvs_flags("", SVs_TEMP); + Perl_dump_indent(aTHX_ level, file, " NAME = \"%s\"\n", + generic_pv_escape(tmpsv, GvNAME(sv), + GvNAMELEN(sv), + GvNAMEUTF8(sv))); + } Perl_dump_indent(aTHX_ level, file, " NAMELEN = %" IVdf "\n", (IV)GvNAMELEN(sv)); do_hv_dump (level, file, " GvSTASH", GvSTASH(sv)); Perl_dump_indent(aTHX_ level, file, " FLAGS = 0x%" UVxf "\n", (UV)GvFLAGS(sv)); @@ -2211,9 +2403,7 @@ For an example of its output, see L. void Perl_sv_dump(pTHX_ SV *sv) { - PERL_ARGS_ASSERT_SV_DUMP; - - if (SvROK(sv)) + if (sv && SvROK(sv)) do_sv_dump(0, Perl_debug_log, sv, 0, 4, 0, 0); else do_sv_dump(0, Perl_debug_log, sv, 0, 0, 0, 0); @@ -2521,22 +2711,8 @@ Perl_debop(pTHX_ const OP *o) break; case OP_GVSV: case OP_GV: - if (cGVOPo_gv && isGV(cGVOPo_gv)) { - SV * const sv = newSV(0); - gv_fullname3(sv, cGVOPo_gv, NULL); - PerlIO_printf(Perl_debug_log, "(%s)", SvPV_nolen_const(sv)); - SvREFCNT_dec_NN(sv); - } - else if (cGVOPo_gv) { - SV * const sv = newSV(0); - assert(SvROK(cGVOPo_gv)); - assert(SvTYPE(SvRV(cGVOPo_gv)) == SVt_PVCV); - PerlIO_printf(Perl_debug_log, "(cv ref: %s)", - SvPV_nolen_const(cv_name((CV *)SvRV(cGVOPo_gv),sv,0))); - SvREFCNT_dec_NN(sv); - } - else - PerlIO_printf(Perl_debug_log, "(NULL)"); + PerlIO_printf(Perl_debug_log, "(%" SVf ")", + SVfARG(S_gv_display(aTHX_ cGVOPo_gv))); break; case OP_PADSV: @@ -2563,6 +2739,154 @@ Perl_debop(pTHX_ const OP *o) return 0; } + +/* +=for apidoc op_class + +Given an op, determine what type of struct it has been allocated as. +Returns one of the OPclass enums, such as OPclass_LISTOP. + +=cut +*/ + + +OPclass +Perl_op_class(pTHX_ const OP *o) +{ + bool custom = 0; + + if (!o) + return OPclass_NULL; + + if (o->op_type == 0) { + if (o->op_targ == OP_NEXTSTATE || o->op_targ == OP_DBSTATE) + return OPclass_COP; + return (o->op_flags & OPf_KIDS) ? OPclass_UNOP : OPclass_BASEOP; + } + + if (o->op_type == OP_SASSIGN) + return ((o->op_private & OPpASSIGN_BACKWARDS) ? OPclass_UNOP : OPclass_BINOP); + + if (o->op_type == OP_AELEMFAST) { +#ifdef USE_ITHREADS + return OPclass_PADOP; +#else + return OPclass_SVOP; +#endif + } + +#ifdef USE_ITHREADS + if (o->op_type == OP_GV || o->op_type == OP_GVSV || + o->op_type == OP_RCATLINE) + return OPclass_PADOP; +#endif + + if (o->op_type == OP_CUSTOM) + custom = 1; + + switch (OP_CLASS(o)) { + case OA_BASEOP: + return OPclass_BASEOP; + + case OA_UNOP: + return OPclass_UNOP; + + case OA_BINOP: + return OPclass_BINOP; + + case OA_LOGOP: + return OPclass_LOGOP; + + case OA_LISTOP: + return OPclass_LISTOP; + + case OA_PMOP: + return OPclass_PMOP; + + case OA_SVOP: + return OPclass_SVOP; + + case OA_PADOP: + return OPclass_PADOP; + + case OA_PVOP_OR_SVOP: + /* + * Character translations (tr///) are usually a PVOP, keeping a + * pointer to a table of shorts used to look up translations. + * Under utf8, however, a simple table isn't practical; instead, + * the OP is an SVOP (or, under threads, a PADOP), + * and the SV is a reference to a swash + * (i.e., an RV pointing to an HV). + */ + return (!custom && + (o->op_private & (OPpTRANS_TO_UTF|OPpTRANS_FROM_UTF)) + ) +#if defined(USE_ITHREADS) + ? OPclass_PADOP : OPclass_PVOP; +#else + ? OPclass_SVOP : OPclass_PVOP; +#endif + + case OA_LOOP: + return OPclass_LOOP; + + case OA_COP: + return OPclass_COP; + + case OA_BASEOP_OR_UNOP: + /* + * UNI(OP_foo) in toke.c returns token UNI or FUNC1 depending on + * whether parens were seen. perly.y uses OPf_SPECIAL to + * signal whether a BASEOP had empty parens or none. + * Some other UNOPs are created later, though, so the best + * test is OPf_KIDS, which is set in newUNOP. + */ + return (o->op_flags & OPf_KIDS) ? OPclass_UNOP : OPclass_BASEOP; + + case OA_FILESTATOP: + /* + * The file stat OPs are created via UNI(OP_foo) in toke.c but use + * the OPf_REF flag to distinguish between OP types instead of the + * usual OPf_SPECIAL flag. As usual, if OPf_KIDS is set, then we + * return OPclass_UNOP so that walkoptree can find our children. If + * OPf_KIDS is not set then we check OPf_REF. Without OPf_REF set + * (no argument to the operator) it's an OP; with OPf_REF set it's + * an SVOP (and op_sv is the GV for the filehandle argument). + */ + return ((o->op_flags & OPf_KIDS) ? OPclass_UNOP : +#ifdef USE_ITHREADS + (o->op_flags & OPf_REF) ? OPclass_PADOP : OPclass_BASEOP); +#else + (o->op_flags & OPf_REF) ? OPclass_SVOP : OPclass_BASEOP); +#endif + case OA_LOOPEXOP: + /* + * next, last, redo, dump and goto use OPf_SPECIAL to indicate that a + * label was omitted (in which case it's a BASEOP) or else a term was + * seen. In this last case, all except goto are definitely PVOP but + * goto is either a PVOP (with an ordinary constant label), an UNOP + * with OPf_STACKED (with a non-constant non-sub) or an UNOP for + * OP_REFGEN (with goto &sub) in which case OPf_STACKED also seems to + * get set. + */ + if (o->op_flags & OPf_STACKED) + return OPclass_UNOP; + else if (o->op_flags & OPf_SPECIAL) + return OPclass_BASEOP; + else + return OPclass_PVOP; + case OA_METHOP: + return OPclass_METHOP; + case OA_UNOP_AUX: + return OPclass_UNOP_AUX; + } + Perl_warn(aTHX_ "Can't determine class of operator %s, assuming BASEOP\n", + OP_NAME(o)); + return OPclass_BASEOP; +} + + + STATIC CV* S_deb_curcv(pTHX_ I32 ix) { diff --git a/embed.fnc b/embed.fnc index 656afe5..89986b4 100644 --- a/embed.fnc +++ b/embed.fnc @@ -506,6 +506,7 @@ p |void |dump_all_perl |bool justperl Ap |void |dump_eval Ap |void |dump_form |NN const GV* gv Ap |void |gv_dump |NULLOK GV* gv +Apd |OPclass|op_class |NULLOK const OP *o Ap |void |op_dump |NN const OP *o Ap |void |pmop_dump |NULLOK PMOP* pm Ap |void |dump_packsubs |NN const HV* stash @@ -678,9 +679,9 @@ ApbmM |SV** |hv_store_flags |NULLOK HV *hv|NULLOK const char *key \ Amd |void |hv_undef |NULLOK HV *hv poX |void |hv_undef_flags |NULLOK HV *hv|U32 flags AmP |I32 |ibcmp |NN const char* a|NN const char* b|I32 len -AnpP |I32 |foldEQ |NN const char* a|NN const char* b|I32 len +Ainp |I32 |foldEQ |NN const char* a|NN const char* b|I32 len AmP |I32 |ibcmp_locale |NN const char* a|NN const char* b|I32 len -AnpP |I32 |foldEQ_locale |NN const char* a|NN const char* b|I32 len +Ainp |I32 |foldEQ_locale |NN const char* a|NN const char* b|I32 len Am |I32 |ibcmp_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ |bool u1|NN const char *s2|NULLOK char **pe2 \ |UV l2|bool u2 @@ -690,7 +691,7 @@ Amd |I32 |foldEQ_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ AMp |I32 |foldEQ_utf8_flags |NN const char *s1|NULLOK char **pe1|UV l1 \ |bool u1|NN const char *s2|NULLOK char **pe2 \ |UV l2|bool u2|U32 flags -AnpP |I32 |foldEQ_latin1 |NN const char* a|NN const char* b|I32 len +Ainp |I32 |foldEQ_latin1 |NN const char* a|NN const char* b|I32 len #if defined(PERL_IN_DOIO_C) sR |bool |ingroup |Gid_t testgid|bool effective #endif @@ -858,6 +859,7 @@ pP |I32 |keyword |NN const char *name|I32 len|bool all_keywords s |void |inplace_aassign |NN OP* o #endif Ap |void |leave_scope |I32 base +p |void |notify_parser_that_changed_to_utf8 : Public lexer API AMpd |void |lex_start |NULLOK SV* line|NULLOK PerlIO *rsfp|U32 flags AMpd |bool |lex_bufutf8 @@ -1526,7 +1528,7 @@ Apd |char* |sv_collxfrm_flags |NN SV *const sv|NN STRLEN *const nxp|I32 const fl Apd |int |getcwd_sv |NN SV* sv Apd |void |sv_dec |NULLOK SV *const sv Apd |void |sv_dec_nomg |NULLOK SV *const sv -Ap |void |sv_dump |NN SV* sv +Ap |void |sv_dump |NULLOK SV* sv ApdR |bool |sv_derived_from|NN SV* sv|NN const char *const name ApdR |bool |sv_derived_from_sv|NN SV* sv|NN SV *namesv|U32 flags ApdR |bool |sv_derived_from_pv|NN SV* sv|NN const char *const name|U32 flags @@ -1551,7 +1553,7 @@ Apmdb |void |sv_insert |NN SV *const bigstr|const STRLEN offset \ |const STRLEN len|NN const char *const little \ |const STRLEN littlelen Apd |void |sv_insert_flags|NN SV *const bigstr|const STRLEN offset|const STRLEN len \ - |NN const char *const little|const STRLEN littlelen|const U32 flags + |NN const char *little|const STRLEN littlelen|const U32 flags Apd |int |sv_isa |NULLOK SV* sv|NN const char *const name Apd |int |sv_isobject |NULLOK SV* sv Apd |STRLEN |sv_len |NULLOK SV *const sv @@ -1713,6 +1715,10 @@ ApdD |UV |to_utf8_case |NN const U8 *p \ |NN SV **swashp \ |NN const char *normal| \ NULLOK const char *special +ApM |char * |_byte_dump_string \ + |NN const U8 * s \ + |const STRLEN len \ + |const bool format #if defined(PERL_IN_UTF8_C) inR |bool |does_utf8_overflow|NN const U8 * const s|NN const U8 * e inR |bool |is_utf8_overlong_given_start_byte_ok|NN const U8 * const s|const STRLEN len @@ -1722,7 +1728,6 @@ sMR |char * |unexpected_non_continuation_text \ |STRLEN print_len \ |const STRLEN non_cont_byte_pos \ |const STRLEN expect_len -sM |char * |_byte_dump_string|NN const U8 * s|const STRLEN len s |void |warn_on_first_deprecated_use \ |NN const char * const name \ |NN const char * const alternative \ @@ -1798,7 +1803,7 @@ ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *len ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen ApdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen ApMD |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen -Amd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen +Aopd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen ApdD |UV |utf8_to_uvuni_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen pM |bool |check_utf8_print |NN const U8 *s|const STRLEN len @@ -1875,8 +1880,10 @@ inR |bool |should_warn_nl|NN const char *pv p |void |write_to_stderr|NN SV* msv : Used in op.c p |int |yyerror |NN const char *const s +p |void |yyquit +p |void |abort_execution|NN const char * const msg|NN const char * const name p |int |yyerror_pv |NN const char *const s|U32 flags -p |int |yyerror_pvn |NN const char *const s|STRLEN len|U32 flags +p |int |yyerror_pvn |NULLOK const char *const s|STRLEN len|U32 flags : Used in perly.y, and by Data::Alias EXp |int |yylex p |void |yyunlex @@ -2339,7 +2346,7 @@ Es |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \ |const bool strict \ |const U32 depth Es |void |reginsert |NN RExC_state_t *pRExC_state \ - |U8 op|NN regnode *opnd|U32 depth + |U8 op|NN regnode *operand|U32 depth Es |void |regtail |NN RExC_state_t * pRExC_state \ |NN const regnode * const p \ |NN const regnode * const val \ @@ -2644,7 +2651,7 @@ sR |SV* |get_and_check_backslash_N_name|NN const char* s \ |NN const char* const e sR |char* |scan_formline |NN char *s sR |char* |scan_heredoc |NN char *s -s |char* |scan_ident |NN char *s|NN char *dest \ +s |char* |scan_ident |NN char *s|NN char *dest \ |STRLEN destlen|I32 ck_uni sR |char* |scan_inputsymbol|NN char *start sR |char* |scan_pat |NN char *start|I32 type diff --git a/embed.h b/embed.h index ba7b2ca..5b9c46c 100644 --- a/embed.h +++ b/embed.h @@ -27,6 +27,7 @@ /* Hide global symbols */ #define Gv_AMupdate(a,b) Perl_Gv_AMupdate(aTHX_ a,b) +#define _byte_dump_string(a,b,c) Perl__byte_dump_string(aTHX_ a,b,c) #define _force_out_malformed_utf8_message(a,b,c,d) Perl__force_out_malformed_utf8_message(aTHX_ a,b,c,d) #define _is_in_locale_category(a,b) Perl__is_in_locale_category(aTHX_ a,b) #define _is_uni_FOO(a,b) Perl__is_uni_FOO(aTHX_ a,b) @@ -434,6 +435,7 @@ #define nothreadhook() Perl_nothreadhook(aTHX) #define op_append_elem(a,b,c) Perl_op_append_elem(aTHX_ a,b,c) #define op_append_list(a,b,c) Perl_op_append_list(aTHX_ a,b,c) +#define op_class(a) Perl_op_class(aTHX_ a) #define op_contextualize(a,b) Perl_op_contextualize(aTHX_ a,b) #define op_convert_list(a,b,c) Perl_op_convert_list(aTHX_ a,b,c) #define op_dump(a) Perl_op_dump(aTHX_ a) @@ -1163,6 +1165,7 @@ #ifdef PERL_CORE #define Slab_Alloc(a) Perl_Slab_Alloc(aTHX_ a) #define Slab_Free(a) Perl_Slab_Free(aTHX_ a) +#define abort_execution(a,b) Perl_abort_execution(aTHX_ a,b) #define alloc_LOGOP(a,b,c) Perl_alloc_LOGOP(aTHX_ a,b,c) #define allocmy(a,b,c) Perl_allocmy(aTHX_ a,b,c) #define amagic_is_enabled(a) Perl_amagic_is_enabled(aTHX_ a) @@ -1350,6 +1353,7 @@ #define newXS_len_flags(a,b,c,d,e,f,g) Perl_newXS_len_flags(aTHX_ a,b,c,d,e,f,g) #define nextargv(a,b) Perl_nextargv(aTHX_ a,b) #define noperl_die Perl_noperl_die +#define notify_parser_that_changed_to_utf8() Perl_notify_parser_that_changed_to_utf8(aTHX) #define oopsAV(a) Perl_oopsAV(aTHX_ a) #define oopsHV(a) Perl_oopsHV(aTHX_ a) #define op_unscope(a) Perl_op_unscope(aTHX_ a) @@ -1413,6 +1417,7 @@ #define yyerror_pv(a,b) Perl_yyerror_pv(aTHX_ a,b) #define yyerror_pvn(a,b,c) Perl_yyerror_pvn(aTHX_ a,b,c) #define yyparse(a) Perl_yyparse(aTHX_ a) +#define yyquit() Perl_yyquit(aTHX) #define yyunlex() Perl_yyunlex(aTHX) # if !(defined(DEBUGGING)) # if !defined(NV_PRESERVES_UV) @@ -1835,7 +1840,6 @@ #define isa_lookup(a,b,c,d) S_isa_lookup(aTHX_ a,b,c,d) # endif # if defined(PERL_IN_UTF8_C) -#define _byte_dump_string(a,b) S__byte_dump_string(aTHX_ a,b) #define _to_utf8_case(a,b,c,d,e,f,g) S__to_utf8_case(aTHX_ a,b,c,d,e,f,g) #define check_and_deprecate(a,b,c,d,e,f) S_check_and_deprecate(aTHX_ a,b,c,d,e,f) #define check_locale_boundary_crossing(a,b,c,d) S_check_locale_boundary_crossing(aTHX_ a,b,c,d) diff --git a/ext/B/B.pm b/ext/B/B.pm index e0f9e21..5ea96fa 100644 --- a/ext/B/B.pm +++ b/ext/B/B.pm @@ -15,7 +15,7 @@ require Exporter; # walkoptree comes from B.xs BEGIN { - $B::VERSION = '1.65'; + $B::VERSION = '1.68'; @B::EXPORT_OK = (); # Our BOOT code needs $VERSION set, and will append to @EXPORT_OK. diff --git a/ext/B/B.xs b/ext/B/B.xs index 2279f36..f6fdd1e 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -39,22 +39,6 @@ static const char* const svclassnames[] = { "B::IO", }; -typedef enum { - OPc_NULL, /* 0 */ - OPc_BASEOP, /* 1 */ - OPc_UNOP, /* 2 */ - OPc_BINOP, /* 3 */ - OPc_LOGOP, /* 4 */ - OPc_LISTOP, /* 5 */ - OPc_PMOP, /* 6 */ - OPc_SVOP, /* 7 */ - OPc_PADOP, /* 8 */ - OPc_PVOP, /* 9 */ - OPc_LOOP, /* 10 */ - OPc_COP, /* 11 */ - OPc_METHOP, /* 12 */ - OPc_UNOP_AUX /* 13 */ -} opclass; static const char* const opclassnames[] = { "B::NULL", @@ -113,146 +97,12 @@ static void B_init_my_cxt(pTHX_ my_cxt_t * cxt) { cxt->x_specialsv_list[6] = (SV *) pWARN_STD; } -static opclass -cc_opclass(pTHX_ const OP *o) -{ - bool custom = 0; - - if (!o) - return OPc_NULL; - - if (o->op_type == 0) { - if (o->op_targ == OP_NEXTSTATE || o->op_targ == OP_DBSTATE) - return OPc_COP; - return (o->op_flags & OPf_KIDS) ? OPc_UNOP : OPc_BASEOP; - } - - if (o->op_type == OP_SASSIGN) - return ((o->op_private & OPpASSIGN_BACKWARDS) ? OPc_UNOP : OPc_BINOP); - - if (o->op_type == OP_AELEMFAST) { -#ifdef USE_ITHREADS - return OPc_PADOP; -#else - return OPc_SVOP; -#endif - } - -#ifdef USE_ITHREADS - if (o->op_type == OP_GV || o->op_type == OP_GVSV || - o->op_type == OP_RCATLINE) - return OPc_PADOP; -#endif - - if (o->op_type == OP_CUSTOM) - custom = 1; - - switch (OP_CLASS(o)) { - case OA_BASEOP: - return OPc_BASEOP; - - case OA_UNOP: - return OPc_UNOP; - - case OA_BINOP: - return OPc_BINOP; - - case OA_LOGOP: - return OPc_LOGOP; - - case OA_LISTOP: - return OPc_LISTOP; - - case OA_PMOP: - return OPc_PMOP; - - case OA_SVOP: - return OPc_SVOP; - - case OA_PADOP: - return OPc_PADOP; - - case OA_PVOP_OR_SVOP: - /* - * Character translations (tr///) are usually a PVOP, keeping a - * pointer to a table of shorts used to look up translations. - * Under utf8, however, a simple table isn't practical; instead, - * the OP is an SVOP (or, under threads, a PADOP), - * and the SV is a reference to a swash - * (i.e., an RV pointing to an HV). - */ - return (!custom && - (o->op_private & (OPpTRANS_TO_UTF|OPpTRANS_FROM_UTF)) - ) -#if defined(USE_ITHREADS) - ? OPc_PADOP : OPc_PVOP; -#else - ? OPc_SVOP : OPc_PVOP; -#endif - - case OA_LOOP: - return OPc_LOOP; - - case OA_COP: - return OPc_COP; - - case OA_BASEOP_OR_UNOP: - /* - * UNI(OP_foo) in toke.c returns token UNI or FUNC1 depending on - * whether parens were seen. perly.y uses OPf_SPECIAL to - * signal whether a BASEOP had empty parens or none. - * Some other UNOPs are created later, though, so the best - * test is OPf_KIDS, which is set in newUNOP. - */ - return (o->op_flags & OPf_KIDS) ? OPc_UNOP : OPc_BASEOP; - - case OA_FILESTATOP: - /* - * The file stat OPs are created via UNI(OP_foo) in toke.c but use - * the OPf_REF flag to distinguish between OP types instead of the - * usual OPf_SPECIAL flag. As usual, if OPf_KIDS is set, then we - * return OPc_UNOP so that walkoptree can find our children. If - * OPf_KIDS is not set then we check OPf_REF. Without OPf_REF set - * (no argument to the operator) it's an OP; with OPf_REF set it's - * an SVOP (and op_sv is the GV for the filehandle argument). - */ - return ((o->op_flags & OPf_KIDS) ? OPc_UNOP : -#ifdef USE_ITHREADS - (o->op_flags & OPf_REF) ? OPc_PADOP : OPc_BASEOP); -#else - (o->op_flags & OPf_REF) ? OPc_SVOP : OPc_BASEOP); -#endif - case OA_LOOPEXOP: - /* - * next, last, redo, dump and goto use OPf_SPECIAL to indicate that a - * label was omitted (in which case it's a BASEOP) or else a term was - * seen. In this last case, all except goto are definitely PVOP but - * goto is either a PVOP (with an ordinary constant label), an UNOP - * with OPf_STACKED (with a non-constant non-sub) or an UNOP for - * OP_REFGEN (with goto &sub) in which case OPf_STACKED also seems to - * get set. - */ - if (o->op_flags & OPf_STACKED) - return OPc_UNOP; - else if (o->op_flags & OPf_SPECIAL) - return OPc_BASEOP; - else - return OPc_PVOP; - case OA_METHOP: - return OPc_METHOP; - case OA_UNOP_AUX: - return OPc_UNOP_AUX; - } - warn("can't determine class of operator %s, assuming BASEOP\n", - OP_NAME(o)); - return OPc_BASEOP; -} static SV * make_op_object(pTHX_ const OP *o) { SV *opsv = sv_newmortal(); - sv_setiv(newSVrv(opsv, opclassnames[cc_opclass(aTHX_ o)]), PTR2IV(o)); + sv_setiv(newSVrv(opsv, opclassnames[op_class(o)]), PTR2IV(o)); return opsv; } @@ -509,7 +359,7 @@ walkoptree(pTHX_ OP *o, const char *method, SV *ref) dSP; OP *kid; SV *object; - const char *const classname = opclassnames[cc_opclass(aTHX_ o)]; + const char *const classname = opclassnames[op_class(o)]; dMY_CXT; /* Check that no-one has changed our reference, or is holding a reference @@ -542,7 +392,7 @@ walkoptree(pTHX_ OP *o, const char *method, SV *ref) ref = walkoptree(aTHX_ kid, method, ref); } } - if (o && (cc_opclass(aTHX_ o) == OPc_PMOP) && o->op_type != OP_SPLIT + if (o && (op_class(o) == OPclass_PMOP) && o->op_type != OP_SPLIT && (kid = PMOP_pmreplroot(cPMOPo))) { ref = walkoptree(aTHX_ kid, method, ref); @@ -1083,7 +933,7 @@ next(o) : &PL_sv_undef); break; case 26: /* B::OP::size */ - ret = sv_2mortal(newSVuv((UV)(opsizes[cc_opclass(aTHX_ o)]))); + ret = sv_2mortal(newSVuv((UV)(opsizes[op_class(o)]))); break; case 27: /* B::OP::name */ case 28: /* B::OP::desc */ @@ -1941,7 +1791,7 @@ is_empty(gv) isGV_with_GP = 1 CODE: if (ix) { - RETVAL = isGV_with_GP(gv) ? TRUE : FALSE; + RETVAL = cBOOL(isGV_with_GP(gv)); } else { RETVAL = GvGP(gv) == Null(GP*); } @@ -2214,7 +2064,7 @@ SV* HASH(h) B::RHE h CODE: - RETVAL = newRV( (SV*)cophh_2hv(h, 0) ); + RETVAL = newRV_noinc( (SV*)cophh_2hv(h, 0) ); OUTPUT: RETVAL diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index 4775c1c..2b1ed5d 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -1455,58 +1455,50 @@ for my $test ( local $TODO = 'This gets mangled by the current pipe implementation' if $^O eq 'VMS'; my $e = <<'EODUMP'; dumpindent is 4 at -e line 1. -{ -1 TYPE = leave ===> NULL - TARG = 1 - FLAGS = (VOID,KIDS,PARENS,SLABBED) - PRIVATE = (REFC) - REFCNT = 1 - { -2 TYPE = enter ===> 3 - FLAGS = (UNKNOWN,SLABBED,MORESIB) - } - { -3 TYPE = nextstate ===> 4 - FLAGS = (VOID,SLABBED,MORESIB) - LINE = 1 - PACKAGE = "t" - } - { -5 TYPE = entersub ===> 1 - TARG = 1 - FLAGS = (VOID,KIDS,STACKED,SLABBED) - PRIVATE = (TARG) - { -6 TYPE = null ===> (5) - (was list) - FLAGS = (UNKNOWN,KIDS,SLABBED) - { -4 TYPE = pushmark ===> 7 - FLAGS = (SCALAR,SLABBED,MORESIB) - } - { -8 TYPE = null ===> (6) - (was rv2cv) - FLAGS = (SCALAR,KIDS,SLABBED) - PRIVATE = (0x1) - { -7 TYPE = gv ===> 5 - FLAGS = (SCALAR,SLABBED) - GV_OR_PADIX - } - } - } - } -} + +1 leave LISTOP(0xNNN) ===> [0x0] + TARG = 1 + FLAGS = (VOID,KIDS,PARENS,SLABBED) + PRIVATE = (REFC) + REFCNT = 1 + | +2 +--enter OP(0xNNN) ===> 3 [nextstate 0xNNN] + | FLAGS = (UNKNOWN,SLABBED,MORESIB) + | +3 +--nextstate COP(0xNNN) ===> 4 [pushmark 0xNNN] + | FLAGS = (VOID,SLABBED,MORESIB) + | LINE = 1 + | PACKAGE = "t" + | | +5 +--entersub UNOP(0xNNN) ===> 1 [leave 0xNNN] + TARG = 1 + FLAGS = (VOID,KIDS,STACKED,SLABBED) + PRIVATE = (TARG) + | +6 +--null (ex-list) UNOP(0xNNN) ===> 5 [entersub 0xNNN] + FLAGS = (UNKNOWN,KIDS,SLABBED) + | +4 +--pushmark OP(0xNNN) ===> 7 [gv 0xNNN] + | FLAGS = (SCALAR,SLABBED,MORESIB) + | +8 +--null (ex-rv2cv) UNOP(0xNNN) ===> 6 [null 0xNNN] + FLAGS = (SCALAR,KIDS,SLABBED) + PRIVATE = (0x1) + | +7 +--gv SVOP(0xNNN) ===> 5 [entersub 0xNNN] + FLAGS = (SCALAR,SLABBED) + GV_OR_PADIX EODUMP - $e =~ s/GV_OR_PADIX/$threads ? "PADIX = 2" : "GV = t::DumpProg"/e; - $e =~ s/.*PRIVATE = \(0x1\).*\n// if $] < 5.021004; + $e =~ s/GV_OR_PADIX/$threads ? "PADIX = 2" : "GV = t::DumpProg (0xNNN)"/e; + $e =~ s/SVOP/PADOP/g if $threads; my $out = t::runperl switches => ['-Ilib'], prog => 'package t; use Devel::Peek q-DumpProg-; DumpProg();', stderr=>1; $out =~ s/ *SEQ = .*\n//; + $out =~ s/0x[0-9a-f]{2,}\]/${1}0xNNN]/g; + $out =~ s/\(0x[0-9a-f]{3,}\)/(0xNNN)/g; is $out, $e, "DumpProg() has no 'Attempt to free X prematurely' warning"; } done_testing(); diff --git a/ext/XS-APItest/t/handy0.t b/ext/XS-APItest/t/handy0.t new file mode 100644 index 0000000..7c3e4e3 --- /dev/null +++ b/ext/XS-APItest/t/handy0.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 0; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy1.t b/ext/XS-APItest/t/handy1.t new file mode 100644 index 0000000..2fd8ec1 --- /dev/null +++ b/ext/XS-APItest/t/handy1.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 1; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy2.t b/ext/XS-APItest/t/handy2.t new file mode 100644 index 0000000..2d4e78d --- /dev/null +++ b/ext/XS-APItest/t/handy2.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 2; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy3.t b/ext/XS-APItest/t/handy3.t new file mode 100644 index 0000000..fe07af3 --- /dev/null +++ b/ext/XS-APItest/t/handy3.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 3; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy4.t b/ext/XS-APItest/t/handy4.t new file mode 100644 index 0000000..08977a1 --- /dev/null +++ b/ext/XS-APItest/t/handy4.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 4; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy5.t b/ext/XS-APItest/t/handy5.t new file mode 100644 index 0000000..d2bb926 --- /dev/null +++ b/ext/XS-APItest/t/handy5.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 5; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy6.t b/ext/XS-APItest/t/handy6.t new file mode 100644 index 0000000..44fd1c6 --- /dev/null +++ b/ext/XS-APItest/t/handy6.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 6; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy7.t b/ext/XS-APItest/t/handy7.t new file mode 100644 index 0000000..c6c2d50 --- /dev/null +++ b/ext/XS-APItest/t/handy7.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 7; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy8.t b/ext/XS-APItest/t/handy8.t new file mode 100644 index 0000000..7e546d7 --- /dev/null +++ b/ext/XS-APItest/t/handy8.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 8; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy9.t b/ext/XS-APItest/t/handy9.t new file mode 100644 index 0000000..38d89c0 --- /dev/null +++ b/ext/XS-APItest/t/handy9.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 9; + +do './t/handy_base.pl'; diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy_base.pl similarity index 97% rename from ext/XS-APItest/t/handy.t rename to ext/XS-APItest/t/handy_base.pl index 5ae97cd..676f7df 100644 --- a/ext/XS-APItest/t/handy.t +++ b/ext/XS-APItest/t/handy_base.pl @@ -166,7 +166,18 @@ my %utf8_param_code = ( "deprecated mathoms" => -2, ); +# This test is split into this number of files. +my $num_test_files = $ENV{TEST_JOBS} || 1; +$num_test_files = 10 if $num_test_files > 10; + +my $property_count = -1; foreach my $name (sort keys %properties, 'octal') { + + # We test every nth property in this run so that this test is split into + # smaller chunks to minimize test suite elapsed time when run in parallel. + $property_count++; + next if $property_count % $num_test_files != $::TEST_CHUNK; + my @invlist; if ($name eq 'octal') { # Hand-roll an inversion list with 0-7 in it and nothing else. @@ -389,8 +400,12 @@ my %to_properties = ( UPPER => 'Uppercase_Mapping', ); - +$property_count = -1; foreach my $name (sort keys %to_properties) { + + $property_count++; + next if $property_count % $num_test_files != $::TEST_CHUNK; + my $property = $to_properties{$name}; my ($list_ref, $map_ref, $format, $missing) = prop_invmap($property, ); diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index c7f2c1d..c7a032e 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -2,156 +2,47 @@ use strict; use Test::More; + +# This file tests various functions and macros in the API related to UTF-8. + +BEGIN { + use_ok('XS::APItest'); + require 'charset_tools.pl'; + require './t/utf8_setup.pl'; +}; + $|=1; no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit # machines, and that is tested elsewhere use XS::APItest; -use Data::Dumper; -my $pound_sign = chr utf8::unicode_to_native(163); - -sub isASCII { ord "A" == 65 } - -sub display_bytes { - use bytes; - my $string = shift; - return '"' - . join("", map { sprintf("\\x%02x", ord $_) } split "", $string) - . '"'; -} -sub output_warnings(@) { - diag "The warnings were:\n" . join("", @_); -} +my $pound_sign = chr utf8::unicode_to_native(163); # This test file can't use byte_utf8a_to_utf8n() from t/charset_tools.pl # because that uses the same functions we are testing here. So UTF-EBCDIC -# strings are hard-coded as I8 strings in this file instead, and we use array -# lookup to translate into the appropriate code page. - -my @i8_to_native = ( # Only code page 1047 so far. -# _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F -0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, -0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, -0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, -0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, -0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, -0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, -0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, -0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, -0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, -0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, -0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, -0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, -0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, -0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, -0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, -0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE, -); - -my @native_to_i8; -for (my $i = 0; $i < 256; $i++) { - $native_to_i8[$i8_to_native[$i]] = $i; -} - -*I8_to_native = (isASCII) - ? sub { return shift } - : sub { return join "", map { chr $i8_to_native[ord $_] } - split "", shift }; -*native_to_I8 = (isASCII) - ? sub { return shift } - : sub { return join "", map { chr $native_to_i8[ord $_] } - split "", shift }; -sub start_byte_to_cont($) { - - # Extract the code point information from the input UTF-8 start byte, and - # return a continuation byte containing the same information. This is - # used in constructing an overlong malformation from valid input. - - my $byte = shift; - my $len = test_UTF8_SKIP($byte); - if ($len < 2) { - die "start_byte_to_cont() is expecting a UTF-8 variant"; - } - - $byte = ord native_to_I8($byte); - - # Copied from utf8.h. This gets rid of the leading 1 bits. - $byte &= ((($len) >= 7) ? 0x00 : (0x1F >> (($len)-2))); - - $byte |= (isASCII) ? 0x80 : 0xA0; - return I8_to_native(chr $byte); -} - -my $is64bit = length sprintf("%x", ~0) > 8; - - -# Test utf8n_to_uvchr_error(). These provide essentially complete code -# coverage. Copied from utf8.h -my $UTF8_ALLOW_EMPTY = 0x0001; -my $UTF8_GOT_EMPTY = $UTF8_ALLOW_EMPTY; -my $UTF8_ALLOW_CONTINUATION = 0x0002; -my $UTF8_GOT_CONTINUATION = $UTF8_ALLOW_CONTINUATION; -my $UTF8_ALLOW_NON_CONTINUATION = 0x0004; -my $UTF8_GOT_NON_CONTINUATION = $UTF8_ALLOW_NON_CONTINUATION; -my $UTF8_ALLOW_SHORT = 0x0008; -my $UTF8_GOT_SHORT = $UTF8_ALLOW_SHORT; -my $UTF8_ALLOW_LONG = 0x0010; -my $UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x0020; -my $UTF8_GOT_LONG = $UTF8_ALLOW_LONG; -my $UTF8_ALLOW_OVERFLOW = 0x0080; -my $UTF8_GOT_OVERFLOW = $UTF8_ALLOW_OVERFLOW; -my $UTF8_DISALLOW_SURROGATE = 0x0100; -my $UTF8_GOT_SURROGATE = $UTF8_DISALLOW_SURROGATE; -my $UTF8_WARN_SURROGATE = 0x0200; -my $UTF8_DISALLOW_NONCHAR = 0x0400; -my $UTF8_GOT_NONCHAR = $UTF8_DISALLOW_NONCHAR; -my $UTF8_WARN_NONCHAR = 0x0800; -my $UTF8_DISALLOW_SUPER = 0x1000; -my $UTF8_GOT_SUPER = $UTF8_DISALLOW_SUPER; -my $UTF8_WARN_SUPER = 0x2000; -my $UTF8_DISALLOW_ABOVE_31_BIT = 0x4000; -my $UTF8_GOT_ABOVE_31_BIT = $UTF8_DISALLOW_ABOVE_31_BIT; -my $UTF8_WARN_ABOVE_31_BIT = 0x8000; -my $UTF8_CHECK_ONLY = 0x10000; -my $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE - = $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE; -my $UTF8_DISALLOW_ILLEGAL_INTERCHANGE - = $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE|$UTF8_DISALLOW_NONCHAR; -my $UTF8_WARN_ILLEGAL_C9_INTERCHANGE - = $UTF8_WARN_SUPER|$UTF8_WARN_SURROGATE; -my $UTF8_WARN_ILLEGAL_INTERCHANGE - = $UTF8_WARN_ILLEGAL_C9_INTERCHANGE|$UTF8_WARN_NONCHAR; - -# Test uvchr_to_utf8(). -my $UNICODE_WARN_SURROGATE = 0x0001; -my $UNICODE_WARN_NONCHAR = 0x0002; -my $UNICODE_WARN_SUPER = 0x0004; -my $UNICODE_WARN_ABOVE_31_BIT = 0x0008; -my $UNICODE_DISALLOW_SURROGATE = 0x0010; -my $UNICODE_DISALLOW_NONCHAR = 0x0020; -my $UNICODE_DISALLOW_SUPER = 0x0040; -my $UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080; +# strings are hard-coded as I8 strings in this file instead, and we use the +# translation functions to/from I8 from that file instead. my $look_for_everything_utf8n_to - = $UTF8_DISALLOW_SURROGATE - | $UTF8_WARN_SURROGATE - | $UTF8_DISALLOW_NONCHAR - | $UTF8_WARN_NONCHAR - | $UTF8_DISALLOW_SUPER - | $UTF8_WARN_SUPER - | $UTF8_DISALLOW_ABOVE_31_BIT - | $UTF8_WARN_ABOVE_31_BIT; + = $::UTF8_DISALLOW_SURROGATE + | $::UTF8_WARN_SURROGATE + | $::UTF8_DISALLOW_NONCHAR + | $::UTF8_WARN_NONCHAR + | $::UTF8_DISALLOW_SUPER + | $::UTF8_WARN_SUPER + | $::UTF8_DISALLOW_ABOVE_31_BIT + | $::UTF8_WARN_ABOVE_31_BIT; my $look_for_everything_uvchr_to - = $UNICODE_DISALLOW_SURROGATE - | $UNICODE_WARN_SURROGATE - | $UNICODE_DISALLOW_NONCHAR - | $UNICODE_WARN_NONCHAR - | $UNICODE_DISALLOW_SUPER - | $UNICODE_WARN_SUPER - | $UNICODE_DISALLOW_ABOVE_31_BIT - | $UNICODE_WARN_ABOVE_31_BIT; + = $::UNICODE_DISALLOW_SURROGATE + | $::UNICODE_WARN_SURROGATE + | $::UNICODE_DISALLOW_NONCHAR + | $::UNICODE_WARN_NONCHAR + | $::UNICODE_DISALLOW_SUPER + | $::UNICODE_WARN_SUPER + | $::UNICODE_DISALLOW_ABOVE_31_BIT + | $::UNICODE_WARN_ABOVE_31_BIT; foreach ([0, '', '', 'empty'], [0, 'N', 'N', '1 char'], @@ -501,7 +392,7 @@ my %code_points = ( : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), ); -if ($is64bit) { +if ($::is64bit) { no warnings qw(overflow portable); $code_points{0x100000000} = (isASCII) @@ -558,21 +449,17 @@ while ($cp < ((isASCII) ? 128 : 160)) { # This is from the definition of # continuation bytes can be in, and what the lowest start byte can be. So we # cycle through them. -my $first_continuation = (isASCII) ? 0x80 : 0xA0; my $final_continuation = 0xBF; my $start = (isASCII) ? 0xC2 : 0xC5; -my $max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence - # representing a single code point - -my $continuation = $first_continuation - 1; +my $continuation = $::first_continuation - 1; while ($cp < 255) { if (++$continuation > $final_continuation) { # Wrap to the next start byte when we reach the final continuation # byte possible - $continuation = $first_continuation; + $continuation = $::first_continuation; $start++; } $code_points{$cp} = I8_to_native(chr($start) . chr($continuation)); @@ -613,8 +500,8 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $u < 0x10000 ? 3 : $u < 0x200000 ? 4 : $u < 0x4000000 ? 5 : - $u < 0x80000000 ? 6 : (($is64bit) - ? ($u < 0x1000000000 ? 7 : $max_bytes) + $u < 0x80000000 ? 6 : (($::is64bit) + ? ($u < 0x1000000000 ? 7 : $::max_bytes) : 7) ) : ($u < 0xA0 ? 1 : @@ -623,7 +510,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $u < 0x40000 ? 4 : $u < 0x400000 ? 5 : $u < 0x4000000 ? 6 : - $u < 0x40000000 ? 7 : $max_bytes ); + $u < 0x40000000 ? 7 : $::max_bytes ); } # If this test fails, subsequent ones are meaningless. @@ -735,21 +622,21 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } my $valid_under_c9strict = 1; my $valid_for_fits_in_31_bits = 1; if ($n > 0x10FFFF) { - $this_utf8_flags &= ~($UTF8_DISALLOW_SUPER|$UTF8_WARN_SUPER); + $this_utf8_flags &= ~($::UTF8_DISALLOW_SUPER|$::UTF8_WARN_SUPER); $valid_under_strict = 0; $valid_under_c9strict = 0; if ($n > 2 ** 31 - 1) { $this_utf8_flags &= - ~($UTF8_DISALLOW_ABOVE_31_BIT|$UTF8_WARN_ABOVE_31_BIT); + ~($::UTF8_DISALLOW_ABOVE_31_BIT|$::UTF8_WARN_ABOVE_31_BIT); $valid_for_fits_in_31_bits = 0; } } elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) { - $this_utf8_flags &= ~($UTF8_DISALLOW_NONCHAR|$UTF8_WARN_NONCHAR); + $this_utf8_flags &= ~($::UTF8_DISALLOW_NONCHAR|$::UTF8_WARN_NONCHAR); $valid_under_strict = 0; } elsif ($n >= 0xD800 && $n <= 0xDFFF) { - $this_utf8_flags &= ~($UTF8_DISALLOW_SURROGATE|$UTF8_WARN_SURROGATE); + $this_utf8_flags &= ~($::UTF8_DISALLOW_SURROGATE|$::UTF8_WARN_SURROGATE); $valid_under_c9strict = 0; $valid_under_strict = 0; } @@ -839,7 +726,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } undef @warnings; $ret = test_isUTF8_CHAR_flags($bytes, $len, - $UTF8_DISALLOW_ILLEGAL_INTERCHANGE); + $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE); is($ret, $expected_len, "Verify isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" . " acts like isSTRICT_UTF8_CHAR"); @@ -872,7 +759,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } undef @warnings; $ret = test_isUTF8_CHAR_flags($bytes, $len, - $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); + $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); is($ret, $expected_len, "Verify isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" ." acts like isC9_STRICT_UTF8_CHAR"); @@ -897,17 +784,17 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } my $this_uvchr_flags = $look_for_everything_uvchr_to; if ($n > 2 ** 31 - 1) { $this_uvchr_flags &= - ~($UNICODE_DISALLOW_ABOVE_31_BIT|$UNICODE_WARN_ABOVE_31_BIT); + ~($::UNICODE_DISALLOW_ABOVE_31_BIT|$::UNICODE_WARN_ABOVE_31_BIT); } if ($n > 0x10FFFF) { - $this_uvchr_flags &= ~($UNICODE_DISALLOW_SUPER|$UNICODE_WARN_SUPER); + $this_uvchr_flags &= ~($::UNICODE_DISALLOW_SUPER|$::UNICODE_WARN_SUPER); } elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) { - $this_uvchr_flags &= ~($UNICODE_DISALLOW_NONCHAR|$UNICODE_WARN_NONCHAR); + $this_uvchr_flags &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR); } elsif ($n >= 0xD800 && $n <= 0xDFFF) { $this_uvchr_flags - &= ~($UNICODE_DISALLOW_SURROGATE|$UNICODE_WARN_SURROGATE); + &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE); } $display_flags = sprintf "0x%x", $this_uvchr_flags; @@ -1115,13 +1002,13 @@ for my $restriction (sort keys %restriction_types) { $this_name .= "($restriction)"; if ($restriction eq "c9strict") { $test - .= ", $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE"; + .= ", $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE"; } elsif ($restriction eq "strict") { - $test .= ", $UTF8_DISALLOW_ILLEGAL_INTERCHANGE"; + $test .= ", $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE"; } elsif ($restriction eq "fits_in_31_bits") { - $test .= ", $UTF8_DISALLOW_ABOVE_31_BIT"; + $test .= ", $::UTF8_DISALLOW_ABOVE_31_BIT"; } else { fail("Internal test error: Unknown restriction " @@ -1195,1420 +1082,6 @@ for my $restriction (sort keys %restriction_types) { } } -my $REPLACEMENT = 0xFFFD; - -# Now test the malformations. All these raise category utf8 warnings. -my @malformations = ( - # ($testname, $bytes, $length, $allow_flags, $expected_error_flags, - # $allowed_uv, $expected_len, $needed_to_discern_len, $message ) - -# Now considered a program bug, and asserted against - #[ "zero length string malformation", "", 0, - # $UTF8_ALLOW_EMPTY, $UTF8_GOT_EMPTY, $REPLACEMENT, 0, 0, - # qr/empty string/ - #], - [ "orphan continuation byte malformation", I8_to_native("${I8c}a"), 2, - $UTF8_ALLOW_CONTINUATION, $UTF8_GOT_CONTINUATION, $REPLACEMENT, - 1, 1, - qr/unexpected continuation byte/ - ], - [ "premature next character malformation (immediate)", - (isASCII) ? "\xc2\xc2\x80" : I8_to_native("\xc5\xc5\xa0"), - 3, - $UTF8_ALLOW_NON_CONTINUATION, $UTF8_GOT_NON_CONTINUATION, $REPLACEMENT, - 1, 2, - qr/unexpected non-continuation byte.*immediately after start byte/ - ], - [ "premature next character malformation (non-immediate)", - I8_to_native("\xef${I8c}a"), 3, - $UTF8_ALLOW_NON_CONTINUATION, $UTF8_GOT_NON_CONTINUATION, $REPLACEMENT, - 2, 3, - qr/unexpected non-continuation byte .* 2 bytes after start byte/ - ], - [ "too short malformation", I8_to_native("\xf1${I8c}a"), 2, - # Having the 'a' after this, but saying there are only 2 bytes also - # tests that we pay attention to the passed in length - $UTF8_ALLOW_SHORT, $UTF8_GOT_SHORT, $REPLACEMENT, - 2, 2, - qr/2 bytes available, need 4/ - ], - [ "overlong malformation, lowest 2-byte", - (isASCII) ? "\xc0\x80" : I8_to_native("\xc0\xa0"), - 2, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - 2, 1, - qr/overlong/ - ], - [ "overlong malformation, highest 2-byte", - (isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"), - 2, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0x7F : utf8::unicode_to_native(0x9F), - 2, 1, - qr/overlong/ - ], - [ "overlong malformation, lowest 3-byte", - (isASCII) ? "\xe0\x80\x80" : I8_to_native("\xe0\xa0\xa0"), - 3, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - 3, (isASCII) ? 2 : 1, - qr/overlong/ - ], - [ "overlong malformation, highest 3-byte", - (isASCII) ? "\xe0\x9f\xbf" : I8_to_native("\xe0\xbf\xbf"), - 3, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0x7FF : 0x3FF, - 3, (isASCII) ? 2 : 1, - qr/overlong/ - ], - [ "overlong malformation, lowest 4-byte", - (isASCII) ? "\xf0\x80\x80\x80" : I8_to_native("\xf0\xa0\xa0\xa0"), - 4, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - 4, 2, - qr/overlong/ - ], - [ "overlong malformation, highest 4-byte", - (isASCII) ? "\xf0\x8F\xbf\xbf" : I8_to_native("\xf0\xaf\xbf\xbf"), - 4, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0xFFFF : 0x3FFF, - 4, 2, - qr/overlong/ - ], - [ "overlong malformation, lowest 5-byte", - (isASCII) - ? "\xf8\x80\x80\x80\x80" - : I8_to_native("\xf8\xa0\xa0\xa0\xa0"), - 5, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - 5, 2, - qr/overlong/ - ], - [ "overlong malformation, highest 5-byte", - (isASCII) - ? "\xf8\x87\xbf\xbf\xbf" - : I8_to_native("\xf8\xa7\xbf\xbf\xbf"), - 5, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0x1FFFFF : 0x3FFFF, - 5, 2, - qr/overlong/ - ], - [ "overlong malformation, lowest 6-byte", - (isASCII) - ? "\xfc\x80\x80\x80\x80\x80" - : I8_to_native("\xfc\xa0\xa0\xa0\xa0\xa0"), - 6, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - 6, 2, - qr/overlong/ - ], - [ "overlong malformation, highest 6-byte", - (isASCII) - ? "\xfc\x83\xbf\xbf\xbf\xbf" - : I8_to_native("\xfc\xa3\xbf\xbf\xbf\xbf"), - 6, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0x3FFFFFF : 0x3FFFFF, - 6, 2, - qr/overlong/ - ], - [ "overlong malformation, lowest 7-byte", - (isASCII) - ? "\xfe\x80\x80\x80\x80\x80\x80" - : I8_to_native("\xfe\xa0\xa0\xa0\xa0\xa0\xa0"), - 7, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - 7, 2, - qr/overlong/ - ], - [ "overlong malformation, highest 7-byte", - (isASCII) - ? "\xfe\x81\xbf\xbf\xbf\xbf\xbf" - : I8_to_native("\xfe\xa1\xbf\xbf\xbf\xbf\xbf"), - 7, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0x7FFFFFFF : 0x3FFFFFF, - 7, 2, - qr/overlong/ - ], -); - -if (isASCII && ! $is64bit) { # 32-bit ASCII platform - no warnings 'portable'; - push @malformations, - [ "overflow malformation", - "\xfe\x84\x80\x80\x80\x80\x80", # Represents 2**32 - 7, - $UTF8_ALLOW_OVERFLOW, $UTF8_GOT_OVERFLOW, - $REPLACEMENT, - 7, 2, - qr/overflows/ - ], - [ "overflow malformation", - "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80", - $max_bytes, - $UTF8_ALLOW_OVERFLOW, $UTF8_GOT_OVERFLOW, - $REPLACEMENT, - $max_bytes, 1, - qr/overflows/ - ]; -} -else { # 64-bit ASCII, or EBCDIC of any size. - # On EBCDIC platforms, another overlong test is needed even on 32-bit - # systems, whereas it doesn't happen on ASCII except on 64-bit ones. - - no warnings 'portable'; - no warnings 'overflow'; # Doesn't run on 32-bit systems, but compiles - push @malformations, - [ "overlong malformation, lowest max-byte", - (isASCII) - ? "\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $max_bytes, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - 0, # NUL - $max_bytes, (isASCII) ? 7 : 8, - qr/overlong/, - ], - [ "overlong malformation, highest max-byte", - (isASCII) # 2**36-1 on ASCII; 2**30-1 on EBCDIC - ? "\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"), - $max_bytes, - $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, - (isASCII) ? 0xFFFFFFFFF : 0x3FFFFFFF, - $max_bytes, (isASCII) ? 7 : 8, - qr/overlong/, - ]; - - if (! $is64bit) { # 32-bit EBCDIC - push @malformations, - [ "overflow malformation", - I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"), - $max_bytes, - $UTF8_ALLOW_OVERFLOW, $UTF8_GOT_OVERFLOW, - $REPLACEMENT, - $max_bytes, 8, - qr/overflows/ - ]; - } - else { # 64-bit, either ASCII or EBCDIC - push @malformations, - [ "overflow malformation", - (isASCII) - ? "\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0" - : I8_to_native( - "\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $max_bytes, - $UTF8_ALLOW_OVERFLOW, $UTF8_GOT_OVERFLOW, - $REPLACEMENT, - $max_bytes, (isASCII) ? 3 : 2, - qr/overflows/ - ]; - } -} - -# For each overlong malformation in the list, we modify it, so that there are -# two tests. The first one returns the replacement character given the input -# flags, and the second test adds a flag that causes the actual code point the -# malformation represents to be returned. -my @added_overlongs; -foreach my $test (@malformations) { - my ($testname, $bytes, $length, $allow_flags, $expected_error_flags, - $allowed_uv, $expected_len, $needed_to_discern_len, $message ) = @$test; - next unless $testname =~ /overlong/; - - $test->[0] .= "; use REPLACEMENT CHAR"; - $test->[5] = $REPLACEMENT; - - push @added_overlongs, - [ $testname . "; use actual value", - $bytes, $length, - $allow_flags | $UTF8_ALLOW_LONG_AND_ITS_VALUE, - $expected_error_flags, $allowed_uv, $expected_len, - $needed_to_discern_len, $message - ]; -} -push @malformations, @added_overlongs; - -foreach my $test (@malformations) { - my ($testname, $bytes, $length, $allow_flags, $expected_error_flags, - $allowed_uv, $expected_len, $needed_to_discern_len, $message ) = @$test; - - if (length($bytes) < $length) { - fail("Internal test error: actual buffer length (" . length($bytes) - . ") must be at least as high as how far we are allowed to read" - . " into it ($length)"); - diag($testname); - next; - } - - undef @warnings; - - my $ret = test_isUTF8_CHAR($bytes, $length); - is($ret, 0, "$testname: isUTF8_CHAR returns 0"); - is(scalar @warnings, 0, "$testname: isUTF8_CHAR() generated no warnings") - or output_warnings(@warnings); - - undef @warnings; - - $ret = test_isUTF8_CHAR_flags($bytes, $length, 0); - is($ret, 0, "$testname: isUTF8_CHAR_flags returns 0"); - is(scalar @warnings, 0, "$testname: isUTF8_CHAR_flags() generated no" - . " warnings") - or output_warnings(@warnings); - - $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); - is($ret, 0, "$testname: isSTRICT_UTF8_CHAR returns 0"); - is(scalar @warnings, 0, - "$testname: isSTRICT_UTF8_CHAR() generated no warnings") - or output_warnings(@warnings); - - $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); - is($ret, 0, "$testname: isC9_STRICT_UTF8_CHAR returns 0"); - is(scalar @warnings, 0, - "$testname: isC9_STRICT_UTF8_CHAR() generated no warnings") - or output_warnings(@warnings); - - for my $j (1 .. $length - 1) { - my $partial = substr($bytes, 0, $j); - - undef @warnings; - - $ret = test_is_utf8_valid_partial_char_flags($bytes, $j, 0); - - my $ret_should_be = 0; - my $comment = ""; - if ($j < $needed_to_discern_len) { - $ret_should_be = 1; - $comment = ", but need $needed_to_discern_len bytes to discern:"; - } - - is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags(" - . display_bytes($partial) - . ")$comment returns $ret_should_be"); - is(scalar @warnings, 0, - "$testname: is_utf8_valid_partial_char_flags() generated" - . " no warnings") - or output_warnings(@warnings); - } - - - # Test what happens when this malformation is not allowed - undef @warnings; - my $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, 0); - is($ret_ref->[0], 0, "$testname: disallowed: Returns 0"); - is($ret_ref->[1], $expected_len, - "$testname: utf8n_to_uvchr_error(), disallowed: Returns expected" - . " length: $expected_len"); - if (is(scalar @warnings, 1, - "$testname: disallowed: Got a single warning ")) - { - like($warnings[0], $message, - "$testname: disallowed: Got expected warning"); - } - else { - if (scalar @warnings) { - output_warnings(@warnings); - } - } - is($ret_ref->[2], $expected_error_flags, - "$testname: utf8n_to_uvchr_error(), disallowed:" - . " Returns expected error"); - - { # Next test when disallowed, and warnings are off. - undef @warnings; - no warnings 'utf8'; - my $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, 0); - is($ret_ref->[0], 0, - "$testname: utf8n_to_uvchr_error(), disallowed: no warnings 'utf8':" - . " Returns 0"); - is($ret_ref->[1], $expected_len, - "$testname: utf8n_to_uvchr_error(), disallowed: no warnings 'utf8':" - . " Returns expected length: $expected_len"); - if (!is(scalar @warnings, 0, - "$testname: utf8n_to_uvchr_error(), disallowed: no warnings 'utf8':" - . " no warnings generated")) - { - output_warnings(@warnings); - } - is($ret_ref->[2], $expected_error_flags, - "$testname: utf8n_to_uvchr_error(), disallowed: Returns" - . " expected error"); - } - - # Test with CHECK_ONLY - undef @warnings; - $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, $UTF8_CHECK_ONLY); - is($ret_ref->[0], 0, "$testname: CHECK_ONLY: Returns 0"); - is($ret_ref->[1], -1, "$testname: CHECK_ONLY: returns -1 for length"); - if (! is(scalar @warnings, 0, - "$testname: CHECK_ONLY: no warnings generated")) - { - output_warnings(@warnings); - } - is($ret_ref->[2], $expected_error_flags, - "$testname: utf8n_to_uvchr_error(), disallowed: Returns expected" - . " error"); - - next if $allow_flags == 0; # Skip if can't allow this malformation - - # Test when the malformation is allowed - undef @warnings; - $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, $allow_flags); - is($ret_ref->[0], $allowed_uv, - "$testname: utf8n_to_uvchr_error(), allowed: Returns expected uv: " - . sprintf("0x%04X", $allowed_uv)); - is($ret_ref->[1], $expected_len, - "$testname: utf8n_to_uvchr_error(), allowed: Returns expected length:" - . " $expected_len"); - if (!is(scalar @warnings, 0, - "$testname: utf8n_to_uvchr_error(), allowed: no warnings" - . " generated")) - { - output_warnings(@warnings); - } - is($ret_ref->[2], $expected_error_flags, - "$testname: utf8n_to_uvchr_error(), disallowed: Returns" - . " expected error"); -} - -sub nonportable_regex ($) { - - # Returns a pattern that matches the non-portable message raised either - # for the specific input code point, or the one generated when there - # is some malformation that precludes the message containing the specific - # code point - - my $code_point = shift; - - my $string = sprintf '(Code point 0x%X is not Unicode, and' - . '|Any UTF-8 sequence that starts with' - . ' "(\\\x[[:xdigit:]]{2})+" is for a' - . ' non-Unicode code point, and is) not portable', - $code_point; - return qr/$string/; -} - -# Now test the cases where a legal code point is generated, but may or may not -# be allowed/warned on. -my @tests = ( - # ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, - # $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message ) - [ "lowest surrogate", - (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), - $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, $UTF8_GOT_SURROGATE, - 'surrogate', 0xD800, - (isASCII) ? 3 : 4, - 2, - qr/surrogate/ - ], - [ "a middle surrogate", - (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), - $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, $UTF8_GOT_SURROGATE, - 'surrogate', 0xD90D, - (isASCII) ? 3 : 4, - 2, - qr/surrogate/ - ], - [ "highest surrogate", - (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), - $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, $UTF8_GOT_SURROGATE, - 'surrogate', 0xDFFF, - (isASCII) ? 3 : 4, - 2, - qr/surrogate/ - ], - [ "first non_unicode", - (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), - $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER, - 'non_unicode', 0x110000, - (isASCII) ? 4 : 5, - 2, - qr/(not Unicode|for a non-Unicode code point).* may not be portable/ - ], - [ "non_unicode whose first byte tells that", - (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), - $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER, - 'non_unicode', - (isASCII) ? 0x140000 : 0x200000, - (isASCII) ? 4 : 5, - 1, - qr/(not Unicode|for a non-Unicode code point).* may not be portable/ - ], - [ "first of 32 consecutive non-character code points", - (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFDD0, - (isASCII) ? 3 : 4, - (isASCII) ? 3 : 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "a mid non-character code point of the 32 consecutive ones", - (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFDE0, - (isASCII) ? 3 : 4, - (isASCII) ? 3 : 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "final of 32 consecutive non-character code points", - (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFDEF, - (isASCII) ? 3 : 4, - (isASCII) ? 3 : 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+FFFE", - (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFFFE, - (isASCII) ? 3 : 4, - (isASCII) ? 3 : 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+FFFF", - (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFFFF, - (isASCII) ? 3 : 4, - (isASCII) ? 3 : 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+1FFFE", - (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x1FFFE, - 4, 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+1FFFF", - (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x1FFFF, - 4, 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+2FFFE", - (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x2FFFE, - 4, 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+2FFFF", - (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x2FFFF, - 4, 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+3FFFE", - (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x3FFFE, - 4, 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+3FFFF", - (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x3FFFF, - 4, 4, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+4FFFE", - (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x4FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+4FFFF", - (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x4FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+5FFFE", - (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x5FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+5FFFF", - (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x5FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+6FFFE", - (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x6FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+6FFFF", - (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x6FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+7FFFE", - (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x7FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+7FFFF", - (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x7FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+8FFFE", - (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x8FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+8FFFF", - (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x8FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+9FFFE", - (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x9FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+9FFFF", - (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x9FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+AFFFE", - (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xAFFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+AFFFF", - (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xAFFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+BFFFE", - (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xBFFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+BFFFF", - (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xBFFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+CFFFE", - (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xCFFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+CFFFF", - (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xCFFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+DFFFE", - (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xDFFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+DFFFF", - (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xDFFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+EFFFE", - (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xEFFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+EFFFF", - (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xEFFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+FFFFE", - (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFFFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+FFFFF", - (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0xFFFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+10FFFE", - (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x10FFFE, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "non-character code point U+10FFFF", - (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, $UTF8_GOT_NONCHAR, - 'nonchar', 0x10FFFF, - (isASCII) ? 4 : 5, - (isASCII) ? 4 : 5, - qr/Unicode non-character.*is not recommended for open interchange/ - ], - [ "requires at least 32 bits", - (isASCII) - ? "\xfe\x82\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), - # This code point is chosen so that it is representable in a UV on - # 32-bit machines - $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x80000000, - (isASCII) ? 7 : $max_bytes, - (isASCII) ? 1 : 8, - nonportable_regex(0x80000000) - ], - [ "highest 32 bit code point", - (isASCII) - ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), - $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0xFFFFFFFF, - (isASCII) ? 7 : $max_bytes, - (isASCII) ? 1 : 8, - nonportable_regex(0xffffffff) - ], - [ "requires at least 32 bits, and use SUPER-type flags, instead of" - . " ABOVE_31_BIT", - (isASCII) - ? "\xfe\x82\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER, - 'utf8', 0x80000000, - (isASCII) ? 7 : $max_bytes, - 1, - nonportable_regex(0x80000000) - ], - [ "overflow with warnings/disallow for more than 31 bits", - # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT - # with overflow. The overflow malformation is never allowed, so - # preventing it takes precedence if the ABOVE_31_BIT options would - # otherwise allow in an overflowing value. The ASCII code points (1 - # for 32-bits; 1 for 64) were chosen because the old overflow - # detection algorithm did not catch them; this means this test also - # checks for that fix. The EBCDIC are arbitrary overflowing ones - # since we have no reports of failures with it. - (($is64bit) - ? ((isASCII) - ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native( - "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) - : ((isASCII) - ? "\xfe\x86\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), - $UTF8_WARN_ABOVE_31_BIT, - $UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0, - (! isASCII || $is64bit) ? $max_bytes : 7, - (isASCII || $is64bit) ? 2 : 8, - qr/overflows/ - ], -); - -if (! $is64bit) { - if (isASCII) { - no warnings qw{portable overflow}; - push @tests, - [ "Lowest 33 bit code point: overflow", - "\xFE\x84\x80\x80\x80\x80\x80", - $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x100000000, - 7, 1, - qr/and( is)? not portable/ - ]; - } -} -else { - no warnings qw{portable overflow}; - push @tests, - [ "More than 32 bits", - (isASCII) - ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" - : I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x1000000000, - $max_bytes, (isASCII) ? 1 : 7, - qr/and( is)? not portable/ - ]; - if (! isASCII) { - push @tests, # These could falsely show wrongly in a naive - # implementation - [ "requires at least 32 bits", - I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x800000000, - $max_bytes, 7, - nonportable_regex(0x80000000) - ], - [ "requires at least 32 bits", - I8_to_native( - "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x10000000000, - $max_bytes, 6, - nonportable_regex(0x10000000000) - ], - [ "requires at least 32 bits", - I8_to_native( - "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x200000000000, - $max_bytes, 5, - nonportable_regex(0x20000000000) - ], - [ "requires at least 32 bits", - I8_to_native( - "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x4000000000000, - $max_bytes, 4, - nonportable_regex(0x4000000000000) - ], - [ "requires at least 32 bits", - I8_to_native( - "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x80000000000000, - $max_bytes, 3, - nonportable_regex(0x80000000000000) - ], - [ "requires at least 32 bits", - I8_to_native( - "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, - $UTF8_GOT_ABOVE_31_BIT, - 'utf8', 0x1000000000000000, - $max_bytes, 2, - nonportable_regex(0x1000000000000000) - ]; - } -} - -foreach my $test (@tests) { - my ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, - $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message - ) = @$test; - - my $length = length $bytes; - my $will_overflow = $testname =~ /overflow/ ? 'overflow' : ""; - - { - use warnings; - undef @warnings; - my $ret = test_isUTF8_CHAR($bytes, $length); - my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0); - if ($will_overflow) { - is($ret, 0, "isUTF8_CHAR() $testname: returns 0"); - is($ret_flags, 0, "isUTF8_CHAR_flags() $testname: returns 0"); - } - else { - is($ret, $length, - "isUTF8_CHAR() $testname: returns expected length: $length"); - is($ret_flags, $length, "isUTF8_CHAR_flags(...,0) $testname:" - . " returns expected length: $length"); - } - is(scalar @warnings, 0, - "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated" - . " no warnings") - or output_warnings(@warnings); - - undef @warnings; - $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); - if ($will_overflow) { - is($ret, 0, "isSTRICT_UTF8_CHAR() $testname: returns 0"); - } - else { - my $expected_ret = ( $testname =~ /surrogate|non-character/ - || $allowed_uv > 0x10FFFF) - ? 0 - : $length; - is($ret, $expected_ret, "isSTRICT_UTF8_CHAR() $testname: returns" - . " expected length: $expected_ret"); - $ret = test_isUTF8_CHAR_flags($bytes, $length, - $UTF8_DISALLOW_ILLEGAL_INTERCHANGE); - is($ret, $expected_ret, - "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" - . " acts like isSTRICT_UTF8_CHAR"); - } - is(scalar @warnings, 0, - "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" - . " generated no warnings") - or output_warnings(@warnings); - - undef @warnings; - $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); - if ($will_overflow) { - is($ret, 0, "isC9_STRICT_UTF8_CHAR() $testname: returns 0"); - } - else { - my $expected_ret = ( $testname =~ /surrogate/ - || $allowed_uv > 0x10FFFF) - ? 0 - : $length; - is($ret, $expected_ret, "isC9_STRICT_UTF8_CHAR() $testname:" - ." returns expected length: $expected_ret"); - $ret = test_isUTF8_CHAR_flags($bytes, $length, - $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); - is($ret, $expected_ret, - "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" - . " acts like isC9_STRICT_UTF8_CHAR"); - } - is(scalar @warnings, 0, - "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" - . " generated no warnings") - or output_warnings(@warnings); - - # Test partial character handling, for each byte not a full character - for my $j (1.. $length - 1) { - - # Skip the test for the interaction between overflow and above-31 - # bit. It is really testing other things than the partial - # character tests, for which other tests in this file are - # sufficient - last if $testname =~ /overflow/; - - foreach my $disallow_flag (0, $disallow_flags) { - my $partial = substr($bytes, 0, $j); - my $ret_should_be; - my $comment; - if ($disallow_flag) { - $ret_should_be = 0; - $comment = "disallowed"; - if ($j < $needed_to_discern_len) { - $ret_should_be = 1; - $comment .= ", but need $needed_to_discern_len bytes" - . " to discern:"; - } - } - else { - $ret_should_be = 1; - $comment = "allowed"; - } - - undef @warnings; - - $ret = test_is_utf8_valid_partial_char_flags($partial, $j, - $disallow_flag); - is($ret, $ret_should_be, - "$testname: is_utf8_valid_partial_char_flags(" - . display_bytes($partial) - . "), $comment: returns $ret_should_be"); - is(scalar @warnings, 0, - "$testname: is_utf8_valid_partial_char_flags()" - . " generated no warnings") - or output_warnings(@warnings); - } - } - } - - # This is more complicated than the malformations tested earlier, as there - # are several orthogonal variables involved. We test all the subclasses - # of utf8 warnings to verify they work with and without the utf8 class, - # and don't have effects on other sublass warnings - foreach my $warning ('utf8', 'surrogate', 'nonchar', 'non_unicode') { - foreach my $warn_flag (0, $warn_flags) { - foreach my $disallow_flag (0, $disallow_flags) { - foreach my $do_warning (0, 1) { - - # We try each of the above with various combinations of - # malformations that can occur on the same input sequence. - foreach my $short ("", "short") { - foreach my $unexpected_noncont ("", - "unexpected non-continuation") - { - foreach my $overlong ("", "overlong") { - - # If we're already at the longest possible, we - # can't create an overlong (which would be longer) - # can't handle anything larger. - next if $overlong && $expected_len >= $max_bytes; - - my @malformations; - my @expected_errors; - push @malformations, $short if $short; - push @malformations, $unexpected_noncont - if $unexpected_noncont; - push @malformations, $overlong if $overlong; - - # The overflow malformation test in the input - # array is coerced into being treated like one of - # the others. - if ($will_overflow) { - push @malformations, 'overflow'; - push @expected_errors, $UTF8_GOT_OVERFLOW; - } - - my $malformations_name = join "/", @malformations; - $malformations_name .= " malformation" - if $malformations_name; - $malformations_name .= "s" if @malformations > 1; - my $this_bytes = $bytes; - my $this_length = $length; - my $expected_uv = $allowed_uv; - my $this_expected_len = $expected_len; - my $this_needed_to_discern_len = $needed_to_discern_len; - if ($malformations_name) { - $expected_uv = 0; - - # Coerce the input into the desired - # malformation - if ($malformations_name =~ /overlong/) { - - # For an overlong, we convert the original - # start byte into a continuation byte with - # the same data bits as originally. ... - substr($this_bytes, 0, 1) - = start_byte_to_cont(substr($this_bytes, - 0, 1)); - - # ... Then we prepend it with a known - # overlong sequence. This should evaluate - # to the exact same code point as the - # original. - $this_bytes - = I8_to_native("\xff") - . (I8_to_native(chr $first_continuation) - x ( $max_bytes - 1 - length($this_bytes))) - . $this_bytes; - $this_length = length($this_bytes); - $this_needed_to_discern_len - = $max_bytes - ($this_expected_len - - $this_needed_to_discern_len); - $this_expected_len = $max_bytes; - push @expected_errors, $UTF8_GOT_LONG; - } - if ($malformations_name =~ /short/) { - - # Just tell the test to not look far - # enough into the input. - $this_length--; - $this_expected_len--; - push @expected_errors, $UTF8_GOT_SHORT; - } - if ($malformations_name - =~ /non-continuation/) - { - # Change the final continuation byte into - # a non one. - my $pos = ($short) ? -2 : -1; - substr($this_bytes, $pos, 1) = '?'; - $this_expected_len--; - push @expected_errors, - $UTF8_GOT_NON_CONTINUATION; - } - } - - my $eval_warn = $do_warning - ? "use warnings '$warning'" - : $warning eq "utf8" - ? "no warnings 'utf8'" - : ( "use warnings 'utf8';" - . " no warnings '$warning'"); - - # Is effectively disallowed if we've set up a - # malformation, even if the flag indicates it is - # allowed. Fix up test name to indicate this as - # well - my $disallowed = $disallow_flag - || $malformations_name; - my $this_name = "utf8n_to_uvchr_error() $testname: " - . (($disallow_flag) - ? 'disallowed' - : $disallowed - ? $disallowed - : 'allowed'); - $this_name .= ", $eval_warn"; - $this_name .= ", " . (($warn_flag) - ? 'with warning flag' - : 'no warning flag'); - - undef @warnings; - my $ret_ref; - my $display_bytes = display_bytes($this_bytes); - my $call = " Call was: $eval_warn; \$ret_ref" - . " = test_utf8n_to_uvchr_error(" - . "'$display_bytes', $this_length," - . "$warn_flag" - . "|$disallow_flag)"; - my $eval_text = "$eval_warn; \$ret_ref" - . " = test_utf8n_to_uvchr_error(" - . "'$this_bytes'," - . " $this_length, $warn_flag" - . "|$disallow_flag)"; - eval "$eval_text"; - if (! ok ("$@ eq ''", - "$this_name: eval succeeded")) - { - diag "\$!='$!'; eval'd=\"$call\""; - next; - } - if ($disallowed) { - is($ret_ref->[0], 0, "$this_name: Returns 0") - or diag $call; - } - else { - is($ret_ref->[0], $expected_uv, - "$this_name: Returns expected uv: " - . sprintf("0x%04X", $expected_uv)) - or diag $call; - } - is($ret_ref->[1], $this_expected_len, - "$this_name: Returns expected length:" - . " $this_expected_len") - or diag $call; - - my $errors = $ret_ref->[2]; - - for (my $i = @expected_errors - 1; $i >= 0; $i--) { - if (ok($expected_errors[$i] & $errors, - "Expected and got error bit return" - . " for $malformations[$i] malformation")) - { - $errors &= ~$expected_errors[$i]; - } - splice @expected_errors, $i, 1; - } - is(scalar @expected_errors, 0, - "Got all the expected malformation errors") - or diag Dumper \@expected_errors; - - if ( $this_expected_len >= $this_needed_to_discern_len - && ($warn_flag || $disallow_flag)) - { - is($errors, $expected_error_flags, - "Got the correct error flag") - or diag $call; - } - else { - is($errors, 0, "Got no other error flag"); - } - - if (@malformations) { - if (! $do_warning && $warning eq 'utf8') { - goto no_warnings_expected; - } - - # Check that each malformation generates a - # warning, removing that warning if found - MALFORMATION: - foreach my $malformation (@malformations) { - foreach (my $i = 0; $i < @warnings; $i++) { - if ($warnings[$i] =~ /$malformation/) { - pass("Expected and got" - . "'$malformation' warning"); - splice @warnings, $i, 1; - next MALFORMATION; - } - } - fail("Expected '$malformation' warning" - . " but didn't get it"); - - } - } - - # Any overflow will override any super or above-31 - # warnings. - goto no_warnings_expected - if $will_overflow || $this_expected_len - < $this_needed_to_discern_len; - - if ( ! $do_warning - && ( $warning eq 'utf8' - || $warning eq $category)) - { - goto no_warnings_expected; - } - elsif ($warn_flag) { - if (is(scalar @warnings, 1, - "$this_name: Got a single warning ")) - { - like($warnings[0], $message, - "$this_name: Got expected warning") - or diag $call; - } - else { - diag $call; - if (scalar @warnings) { - output_warnings(@warnings); - } - } - } - else { - no_warnings_expected: - unless (is(scalar @warnings, 0, - "$this_name: Got no warnings")) - { - diag $call; - output_warnings(@warnings); - } - } - - # Check CHECK_ONLY results when the input is - # disallowed. Do this when actually disallowed, - # not just when the $disallow_flag is set - if ($disallowed) { - undef @warnings; - $ret_ref = test_utf8n_to_uvchr_error( - $this_bytes, $this_length, - $disallow_flag|$UTF8_CHECK_ONLY); - is($ret_ref->[0], 0, - "$this_name, CHECK_ONLY: Returns 0") - or diag $call; - is($ret_ref->[1], -1, - "$this_name: CHECK_ONLY: returns -1 for length") - or diag $call; - if (! is(scalar @warnings, 0, - "$this_name, CHECK_ONLY: no warnings" - . " generated")) - { - diag $call; - output_warnings(@warnings); - } - } - - # Now repeat some of the above, but for - # uvchr_to_utf8_flags(). Since this comes from an - # existing code point, it hasn't overflowed, and - # isn't malformed. - next if @malformations; - - # The warning and disallow flags passed in are for - # utf8n_to_uvchr_error(). Convert them for - # uvchr_to_utf8_flags(). - my $uvchr_warn_flag = 0; - my $uvchr_disallow_flag = 0; - if ($warn_flag) { - if ($warn_flag == $UTF8_WARN_SURROGATE) { - $uvchr_warn_flag = $UNICODE_WARN_SURROGATE - } - elsif ($warn_flag == $UTF8_WARN_NONCHAR) { - $uvchr_warn_flag = $UNICODE_WARN_NONCHAR - } - elsif ($warn_flag == $UTF8_WARN_SUPER) { - $uvchr_warn_flag = $UNICODE_WARN_SUPER - } - elsif ($warn_flag == $UTF8_WARN_ABOVE_31_BIT) { - $uvchr_warn_flag - = $UNICODE_WARN_ABOVE_31_BIT; - } - else { - fail(sprintf "Unexpected warn flag: %x", - $warn_flag); - next; - } - } - if ($disallow_flag) { - if ($disallow_flag == $UTF8_DISALLOW_SURROGATE) - { - $uvchr_disallow_flag - = $UNICODE_DISALLOW_SURROGATE; - } - elsif ($disallow_flag == $UTF8_DISALLOW_NONCHAR) - { - $uvchr_disallow_flag - = $UNICODE_DISALLOW_NONCHAR; - } - elsif ($disallow_flag == $UTF8_DISALLOW_SUPER) { - $uvchr_disallow_flag - = $UNICODE_DISALLOW_SUPER; - } - elsif ($disallow_flag - == $UTF8_DISALLOW_ABOVE_31_BIT) - { - $uvchr_disallow_flag = - $UNICODE_DISALLOW_ABOVE_31_BIT; - } - else { - fail(sprintf "Unexpected disallow flag: %x", - $disallow_flag); - next; - } - } - - $disallowed = $uvchr_disallow_flag; - - $this_name = "uvchr_to_utf8_flags() $testname: " - . (($uvchr_disallow_flag) - ? 'disallowed' - : ($disallowed) - ? 'ABOVE_31_BIT allowed' - : 'allowed'); - $this_name .= ", $eval_warn"; - $this_name .= ", " . (($uvchr_warn_flag) - ? 'with warning flag' - : 'no warning flag'); - - undef @warnings; - my $ret; - my $warn_flag = sprintf "0x%x", $uvchr_warn_flag; - my $disallow_flag = sprintf "0x%x", - $uvchr_disallow_flag; - $call = sprintf(" Call was: $eval_warn; \$ret" - . " = test_uvchr_to_utf8_flags(" - . " 0x%x, $warn_flag|$disallow_flag)", - $allowed_uv); - $eval_text = "$eval_warn; \$ret =" - . " test_uvchr_to_utf8_flags(" - . "$allowed_uv, $warn_flag|" - . "$disallow_flag)"; - eval "$eval_text"; - if (! ok ("$@ eq ''", "$this_name: eval succeeded")) - { - diag "\$!='$!'; eval'd=\"$eval_text\""; - next; - } - if ($disallowed) { - is($ret, undef, "$this_name: Returns undef") - or diag $call; - } - else { - is($ret, $bytes, "$this_name: Returns expected string") - or diag $call; - } - if (! $do_warning - && ($warning eq 'utf8' || $warning eq $category)) - { - if (!is(scalar @warnings, 0, - "$this_name: No warnings generated")) - { - diag $call; - output_warnings(@warnings); - } - } - elsif ( $uvchr_warn_flag - && ( $warning eq 'utf8' - || $warning eq $category)) - { - if (is(scalar @warnings, 1, - "$this_name: Got a single warning ")) - { - like($warnings[0], $message, - "$this_name: Got expected warning") - or diag $call; - } - else { - diag $call; - output_warnings(@warnings) - if scalar @warnings; - } - } - } - } - } - } - } - } - } -} - SKIP: { isASCII diff --git a/ext/XS-APItest/t/utf8_malformed.t b/ext/XS-APItest/t/utf8_malformed.t new file mode 100644 index 0000000..16c5b7f --- /dev/null +++ b/ext/XS-APItest/t/utf8_malformed.t @@ -0,0 +1,418 @@ +#!perl -w + +# Test handling of various UTF-8 malformations + +use strict; +use Test::More; + +BEGIN { + use_ok('XS::APItest'); + require 'charset_tools.pl'; + require './t/utf8_setup.pl'; +}; + +$|=1; + +no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit + # machines, and that is tested elsewhere + +use XS::APItest; + +my @warnings; + +use warnings 'utf8'; +local $SIG{__WARN__} = sub { push @warnings, @_ }; + +my $I8c = $::I8c; + +my $REPLACEMENT = 0xFFFD; + +# Now test the malformations. All these raise category utf8 warnings. +my @malformations = ( + # ($testname, $bytes, $length, $allow_flags, $expected_error_flags, + # $allowed_uv, $expected_len, $needed_to_discern_len, $message ) + +# Now considered a program bug, and asserted against + #[ "zero length string malformation", "", 0, + # $::UTF8_ALLOW_EMPTY, $::UTF8_GOT_EMPTY, $REPLACEMENT, 0, 0, + # qr/empty string/ + #], + [ "orphan continuation byte malformation", I8_to_native("${I8c}a"), 2, + $::UTF8_ALLOW_CONTINUATION, $::UTF8_GOT_CONTINUATION, $REPLACEMENT, + 1, 1, + qr/unexpected continuation byte/ + ], + [ "premature next character malformation (immediate)", + (isASCII) ? "\xc2\xc2\x80" : I8_to_native("\xc5\xc5\xa0"), + 3, + $::UTF8_ALLOW_NON_CONTINUATION, $::UTF8_GOT_NON_CONTINUATION, $REPLACEMENT, + 1, 2, + qr/unexpected non-continuation byte.*immediately after start byte/ + ], + [ "premature next character malformation (non-immediate)", + I8_to_native("\xef${I8c}a"), 3, + $::UTF8_ALLOW_NON_CONTINUATION, $::UTF8_GOT_NON_CONTINUATION, $REPLACEMENT, + 2, 3, + qr/unexpected non-continuation byte .* 2 bytes after start byte/ + ], + [ "too short malformation", I8_to_native("\xf1${I8c}a"), 2, + # Having the 'a' after this, but saying there are only 2 bytes also + # tests that we pay attention to the passed in length + $::UTF8_ALLOW_SHORT, $::UTF8_GOT_SHORT, $REPLACEMENT, + 2, 2, + qr/2 bytes available, need 4/ + ], + [ "overlong malformation, lowest 2-byte", + (isASCII) ? "\xc0\x80" : I8_to_native("\xc0\xa0"), + 2, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + 2, 1, + qr/overlong/ + ], + [ "overlong malformation, highest 2-byte", + (isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"), + 2, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0x7F : utf8::unicode_to_native(0x9F), + 2, 1, + qr/overlong/ + ], + [ "overlong malformation, lowest 3-byte", + (isASCII) ? "\xe0\x80\x80" : I8_to_native("\xe0\xa0\xa0"), + 3, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + 3, (isASCII) ? 2 : 1, + qr/overlong/ + ], + [ "overlong malformation, highest 3-byte", + (isASCII) ? "\xe0\x9f\xbf" : I8_to_native("\xe0\xbf\xbf"), + 3, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0x7FF : 0x3FF, + 3, (isASCII) ? 2 : 1, + qr/overlong/ + ], + [ "overlong malformation, lowest 4-byte", + (isASCII) ? "\xf0\x80\x80\x80" : I8_to_native("\xf0\xa0\xa0\xa0"), + 4, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + 4, 2, + qr/overlong/ + ], + [ "overlong malformation, highest 4-byte", + (isASCII) ? "\xf0\x8F\xbf\xbf" : I8_to_native("\xf0\xaf\xbf\xbf"), + 4, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0xFFFF : 0x3FFF, + 4, 2, + qr/overlong/ + ], + [ "overlong malformation, lowest 5-byte", + (isASCII) + ? "\xf8\x80\x80\x80\x80" + : I8_to_native("\xf8\xa0\xa0\xa0\xa0"), + 5, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + 5, 2, + qr/overlong/ + ], + [ "overlong malformation, highest 5-byte", + (isASCII) + ? "\xf8\x87\xbf\xbf\xbf" + : I8_to_native("\xf8\xa7\xbf\xbf\xbf"), + 5, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0x1FFFFF : 0x3FFFF, + 5, 2, + qr/overlong/ + ], + [ "overlong malformation, lowest 6-byte", + (isASCII) + ? "\xfc\x80\x80\x80\x80\x80" + : I8_to_native("\xfc\xa0\xa0\xa0\xa0\xa0"), + 6, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + 6, 2, + qr/overlong/ + ], + [ "overlong malformation, highest 6-byte", + (isASCII) + ? "\xfc\x83\xbf\xbf\xbf\xbf" + : I8_to_native("\xfc\xa3\xbf\xbf\xbf\xbf"), + 6, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0x3FFFFFF : 0x3FFFFF, + 6, 2, + qr/overlong/ + ], + [ "overlong malformation, lowest 7-byte", + (isASCII) + ? "\xfe\x80\x80\x80\x80\x80\x80" + : I8_to_native("\xfe\xa0\xa0\xa0\xa0\xa0\xa0"), + 7, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + 7, 2, + qr/overlong/ + ], + [ "overlong malformation, highest 7-byte", + (isASCII) + ? "\xfe\x81\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xfe\xa1\xbf\xbf\xbf\xbf\xbf"), + 7, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0x7FFFFFFF : 0x3FFFFFF, + 7, 2, + qr/overlong/ + ], +); + +if (isASCII && ! $::is64bit) { # 32-bit ASCII platform + no warnings 'portable'; + push @malformations, + [ "overflow malformation", + "\xfe\x84\x80\x80\x80\x80\x80", # Represents 2**32 + 7, + $::UTF8_ALLOW_OVERFLOW, $::UTF8_GOT_OVERFLOW, + $REPLACEMENT, + 7, 2, + qr/overflows/ + ], + [ "overflow malformation", + "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80", + $::max_bytes, + $::UTF8_ALLOW_OVERFLOW, $::UTF8_GOT_OVERFLOW, + $REPLACEMENT, + $::max_bytes, 1, + qr/overflows/ + ]; +} +else { # 64-bit ASCII, or EBCDIC of any size. + # On EBCDIC platforms, another overlong test is needed even on 32-bit + # systems, whereas it doesn't happen on ASCII except on 64-bit ones. + + no warnings 'portable'; + no warnings 'overflow'; # Doesn't run on 32-bit systems, but compiles + push @malformations, + [ "overlong malformation, lowest max-byte", + (isASCII) + ? "\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::max_bytes, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + 0, # NUL + $::max_bytes, (isASCII) ? 7 : 8, + qr/overlong/, + ], + [ "overlong malformation, highest max-byte", + (isASCII) # 2**36-1 on ASCII; 2**30-1 on EBCDIC + ? "\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"), + $::max_bytes, + $::UTF8_ALLOW_LONG, $::UTF8_GOT_LONG, + (isASCII) ? 0xFFFFFFFFF : 0x3FFFFFFF, + $::max_bytes, (isASCII) ? 7 : 8, + qr/overlong/, + ]; + + if (! $::is64bit) { # 32-bit EBCDIC + push @malformations, + [ "overflow malformation", + I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"), + $::max_bytes, + $::UTF8_ALLOW_OVERFLOW, $::UTF8_GOT_OVERFLOW, + $REPLACEMENT, + $::max_bytes, 8, + qr/overflows/ + ]; + } + else { # 64-bit, either ASCII or EBCDIC + push @malformations, + [ "overflow malformation", + (isASCII) + ? "\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0" + : I8_to_native( + "\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::max_bytes, + $::UTF8_ALLOW_OVERFLOW, $::UTF8_GOT_OVERFLOW, + $REPLACEMENT, + $::max_bytes, (isASCII) ? 3 : 2, + qr/overflows/ + ]; + } +} + +# For each overlong malformation in the list, we modify it, so that there are +# two tests. The first one returns the replacement character given the input +# flags, and the second test adds a flag that causes the actual code point the +# malformation represents to be returned. +my @added_overlongs; +foreach my $test (@malformations) { + my ($testname, $bytes, $length, $allow_flags, $expected_error_flags, + $allowed_uv, $expected_len, $needed_to_discern_len, $message ) = @$test; + next unless $testname =~ /overlong/; + + $test->[0] .= "; use REPLACEMENT CHAR"; + $test->[5] = $REPLACEMENT; + + push @added_overlongs, + [ $testname . "; use actual value", + $bytes, $length, + $allow_flags | $::UTF8_ALLOW_LONG_AND_ITS_VALUE, + $expected_error_flags, $allowed_uv, $expected_len, + $needed_to_discern_len, $message + ]; +} +push @malformations, @added_overlongs; + +foreach my $test (@malformations) { + my ($testname, $bytes, $length, $allow_flags, $expected_error_flags, + $allowed_uv, $expected_len, $needed_to_discern_len, $message ) = @$test; + + if (length($bytes) < $length) { + fail("Internal test error: actual buffer length (" . length($bytes) + . ") must be at least as high as how far we are allowed to read" + . " into it ($length)"); + diag($testname); + next; + } + + undef @warnings; + + my $ret = test_isUTF8_CHAR($bytes, $length); + is($ret, 0, "$testname: isUTF8_CHAR returns 0"); + is(scalar @warnings, 0, "$testname: isUTF8_CHAR() generated no warnings") + or output_warnings(@warnings); + + undef @warnings; + + $ret = test_isUTF8_CHAR_flags($bytes, $length, 0); + is($ret, 0, "$testname: isUTF8_CHAR_flags returns 0"); + is(scalar @warnings, 0, "$testname: isUTF8_CHAR_flags() generated no" + . " warnings") + or output_warnings(@warnings); + + $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); + is($ret, 0, "$testname: isSTRICT_UTF8_CHAR returns 0"); + is(scalar @warnings, 0, + "$testname: isSTRICT_UTF8_CHAR() generated no warnings") + or output_warnings(@warnings); + + $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); + is($ret, 0, "$testname: isC9_STRICT_UTF8_CHAR returns 0"); + is(scalar @warnings, 0, + "$testname: isC9_STRICT_UTF8_CHAR() generated no warnings") + or output_warnings(@warnings); + + for my $j (1 .. $length - 1) { + my $partial = substr($bytes, 0, $j); + + undef @warnings; + + $ret = test_is_utf8_valid_partial_char_flags($bytes, $j, 0); + + my $ret_should_be = 0; + my $comment = ""; + if ($j < $needed_to_discern_len) { + $ret_should_be = 1; + $comment = ", but need $needed_to_discern_len bytes to discern:"; + } + + is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags(" + . display_bytes($partial) + . ")$comment returns $ret_should_be"); + is(scalar @warnings, 0, + "$testname: is_utf8_valid_partial_char_flags() generated" + . " no warnings") + or output_warnings(@warnings); + } + + + # Test what happens when this malformation is not allowed + undef @warnings; + my $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, 0); + is($ret_ref->[0], 0, "$testname: disallowed: Returns 0"); + is($ret_ref->[1], $expected_len, + "$testname: utf8n_to_uvchr_error(), disallowed: Returns expected" + . " length: $expected_len"); + if (is(scalar @warnings, 1, + "$testname: disallowed: Got a single warning ")) + { + like($warnings[0], $message, + "$testname: disallowed: Got expected warning"); + } + else { + if (scalar @warnings) { + output_warnings(@warnings); + } + } + is($ret_ref->[2], $expected_error_flags, + "$testname: utf8n_to_uvchr_error(), disallowed:" + . " Returns expected error"); + + { # Next test when disallowed, and warnings are off. + undef @warnings; + no warnings 'utf8'; + my $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, 0); + is($ret_ref->[0], 0, + "$testname: utf8n_to_uvchr_error(), disallowed: no warnings 'utf8':" + . " Returns 0"); + is($ret_ref->[1], $expected_len, + "$testname: utf8n_to_uvchr_error(), disallowed: no warnings 'utf8':" + . " Returns expected length: $expected_len"); + if (!is(scalar @warnings, 0, + "$testname: utf8n_to_uvchr_error(), disallowed: no warnings 'utf8':" + . " no warnings generated")) + { + output_warnings(@warnings); + } + is($ret_ref->[2], $expected_error_flags, + "$testname: utf8n_to_uvchr_error(), disallowed: Returns" + . " expected error"); + } + + # Test with CHECK_ONLY + undef @warnings; + $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, $::UTF8_CHECK_ONLY); + is($ret_ref->[0], 0, "$testname: CHECK_ONLY: Returns 0"); + is($ret_ref->[1], -1, "$testname: CHECK_ONLY: returns -1 for length"); + if (! is(scalar @warnings, 0, + "$testname: CHECK_ONLY: no warnings generated")) + { + output_warnings(@warnings); + } + is($ret_ref->[2], $expected_error_flags, + "$testname: utf8n_to_uvchr_error(), disallowed: Returns expected" + . " error"); + + next if $allow_flags == 0; # Skip if can't allow this malformation + + # Test when the malformation is allowed + undef @warnings; + $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, $allow_flags); + is($ret_ref->[0], $allowed_uv, + "$testname: utf8n_to_uvchr_error(), allowed: Returns expected uv: " + . sprintf("0x%04X", $allowed_uv)); + is($ret_ref->[1], $expected_len, + "$testname: utf8n_to_uvchr_error(), allowed: Returns expected length:" + . " $expected_len"); + if (!is(scalar @warnings, 0, + "$testname: utf8n_to_uvchr_error(), allowed: no warnings" + . " generated")) + { + output_warnings(@warnings); + } + is($ret_ref->[2], $expected_error_flags, + "$testname: utf8n_to_uvchr_error(), disallowed: Returns" + . " expected error"); +} + +done_testing; diff --git a/ext/XS-APItest/t/utf8_setup.pl b/ext/XS-APItest/t/utf8_setup.pl new file mode 100644 index 0000000..0943900 --- /dev/null +++ b/ext/XS-APItest/t/utf8_setup.pl @@ -0,0 +1,98 @@ +# Common subroutines and constants, called by .t files in this directory that +# deal with UTF-8 + +# The test files can't use byte_utf8a_to_utf8n() from t/charset_tools.pl +# because that uses the same functions we are testing here. So UTF-EBCDIC +# strings are hard-coded as I8 strings in this file instead, and we use the +# translation functions to/from I8 from that file instead. + +sub isASCII { ord "A" == 65 } + +sub display_bytes { + use bytes; + my $string = shift; + return '"' + . join("", map { sprintf("\\x%02x", ord $_) } split "", $string) + . '"'; +} + +sub output_warnings(@) { + diag "The warnings were:\n" . join("", @_); +} + +sub start_byte_to_cont($) { + + # Extract the code point information from the input UTF-8 start byte, and + # return a continuation byte containing the same information. This is + # used in constructing an overlong malformation from valid input. + + my $byte = shift; + my $len = test_UTF8_SKIP($byte); + if ($len < 2) { + die "start_byte_to_cont() is expecting a UTF-8 variant"; + } + + $byte = ord native_to_I8($byte); + + # Copied from utf8.h. This gets rid of the leading 1 bits. + $byte &= ((($len) >= 7) ? 0x00 : (0x1F >> (($len)-2))); + + $byte |= (isASCII) ? 0x80 : 0xA0; + return I8_to_native(chr $byte); +} + +$::is64bit = length sprintf("%x", ~0) > 8; + +$::first_continuation = (isASCII) ? 0x80 : 0xA0; + +$::I8c = (isASCII) ? "\x80" : "\xa0"; # A continuation byte + + +$::max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence + # representing a single code point + +# Copied from utf8.h +$::UTF8_ALLOW_EMPTY = 0x0001; +$::UTF8_GOT_EMPTY = $UTF8_ALLOW_EMPTY; +$::UTF8_ALLOW_CONTINUATION = 0x0002; +$::UTF8_GOT_CONTINUATION = $UTF8_ALLOW_CONTINUATION; +$::UTF8_ALLOW_NON_CONTINUATION = 0x0004; +$::UTF8_GOT_NON_CONTINUATION = $UTF8_ALLOW_NON_CONTINUATION; +$::UTF8_ALLOW_SHORT = 0x0008; +$::UTF8_GOT_SHORT = $UTF8_ALLOW_SHORT; +$::UTF8_ALLOW_LONG = 0x0010; +$::UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x0020; +$::UTF8_GOT_LONG = $UTF8_ALLOW_LONG; +$::UTF8_ALLOW_OVERFLOW = 0x0080; +$::UTF8_GOT_OVERFLOW = $UTF8_ALLOW_OVERFLOW; +$::UTF8_DISALLOW_SURROGATE = 0x0100; +$::UTF8_GOT_SURROGATE = $UTF8_DISALLOW_SURROGATE; +$::UTF8_WARN_SURROGATE = 0x0200; +$::UTF8_DISALLOW_NONCHAR = 0x0400; +$::UTF8_GOT_NONCHAR = $UTF8_DISALLOW_NONCHAR; +$::UTF8_WARN_NONCHAR = 0x0800; +$::UTF8_DISALLOW_SUPER = 0x1000; +$::UTF8_GOT_SUPER = $UTF8_DISALLOW_SUPER; +$::UTF8_WARN_SUPER = 0x2000; +$::UTF8_DISALLOW_ABOVE_31_BIT = 0x4000; +$::UTF8_GOT_ABOVE_31_BIT = $UTF8_DISALLOW_ABOVE_31_BIT; +$::UTF8_WARN_ABOVE_31_BIT = 0x8000; +$::UTF8_CHECK_ONLY = 0x10000; +$::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE + = $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE; +$::UTF8_DISALLOW_ILLEGAL_INTERCHANGE + = $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE|$UTF8_DISALLOW_NONCHAR; +$::UTF8_WARN_ILLEGAL_C9_INTERCHANGE + = $UTF8_WARN_SUPER|$UTF8_WARN_SURROGATE; +$::UTF8_WARN_ILLEGAL_INTERCHANGE + = $UTF8_WARN_ILLEGAL_C9_INTERCHANGE|$UTF8_WARN_NONCHAR; + +# Test uvchr_to_utf8(). +$::UNICODE_WARN_SURROGATE = 0x0001; +$::UNICODE_WARN_NONCHAR = 0x0002; +$::UNICODE_WARN_SUPER = 0x0004; +$::UNICODE_WARN_ABOVE_31_BIT = 0x0008; +$::UNICODE_DISALLOW_SURROGATE = 0x0010; +$::UNICODE_DISALLOW_NONCHAR = 0x0020; +$::UNICODE_DISALLOW_SUPER = 0x0040; +$::UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080; diff --git a/ext/XS-APItest/t/utf8_warn0.t b/ext/XS-APItest/t/utf8_warn0.t new file mode 100644 index 0000000..3f91bf5 --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn0.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 0; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn1.t b/ext/XS-APItest/t/utf8_warn1.t new file mode 100644 index 0000000..beb4faf --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn1.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 1; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn2.t b/ext/XS-APItest/t/utf8_warn2.t new file mode 100644 index 0000000..d6d3e7a --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn2.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 2; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn3.t b/ext/XS-APItest/t/utf8_warn3.t new file mode 100644 index 0000000..ae0da88 --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn3.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 3; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn4.t b/ext/XS-APItest/t/utf8_warn4.t new file mode 100644 index 0000000..52e8250 --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn4.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 4; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn5.t b/ext/XS-APItest/t/utf8_warn5.t new file mode 100644 index 0000000..94f8f0c --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn5.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 5; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn6.t b/ext/XS-APItest/t/utf8_warn6.t new file mode 100644 index 0000000..5995db6 --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn6.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 6; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn7.t b/ext/XS-APItest/t/utf8_warn7.t new file mode 100644 index 0000000..27dc96b --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn7.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 7; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn8.t b/ext/XS-APItest/t/utf8_warn8.t new file mode 100644 index 0000000..01de3b6 --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn8.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 8; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn9.t b/ext/XS-APItest/t/utf8_warn9.t new file mode 100644 index 0000000..aa4c42f --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn9.t @@ -0,0 +1,7 @@ +use strict; +use warnings; +no warnings 'once'; + +$::TEST_CHUNK = 9; + +do './t/utf8_warn_base.pl'; diff --git a/ext/XS-APItest/t/utf8_warn_base.pl b/ext/XS-APItest/t/utf8_warn_base.pl new file mode 100644 index 0000000..66f6f3d --- /dev/null +++ b/ext/XS-APItest/t/utf8_warn_base.pl @@ -0,0 +1,1059 @@ +#!perl -w + +# This is a base file to be used by various .t's in its directory + +use strict; +use Test::More; + +BEGIN { + use_ok('XS::APItest'); + require 'charset_tools.pl'; + require './t/utf8_setup.pl'; +}; + +$|=1; + +no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit + # machines, and that is tested elsewhere + +use XS::APItest; +use Data::Dumper; + +my @warnings; + +use warnings 'utf8'; +local $SIG{__WARN__} = sub { push @warnings, @_ }; + +sub nonportable_regex ($) { + + # Returns a pattern that matches the non-portable message raised either + # for the specific input code point, or the one generated when there + # is some malformation that precludes the message containing the specific + # code point + + my $code_point = shift; + + my $string = sprintf '(Code point 0x%X is not Unicode, and' + . '|Any UTF-8 sequence that starts with' + . ' "(\\\x[[:xdigit:]]{2})+" is for a' + . ' non-Unicode code point, and is) not portable', + $code_point; + return qr/$string/; +} + +# Now test the cases where a legal code point is generated, but may or may not +# be allowed/warned on. +my @tests = ( + # ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, + # $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message ) + [ "lowest surrogate", + (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), + $::UTF8_WARN_SURROGATE, $::UTF8_DISALLOW_SURROGATE, $::UTF8_GOT_SURROGATE, + 'surrogate', 0xD800, + (isASCII) ? 3 : 4, + 2, + qr/surrogate/ + ], + [ "a middle surrogate", + (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), + $::UTF8_WARN_SURROGATE, $::UTF8_DISALLOW_SURROGATE, $::UTF8_GOT_SURROGATE, + 'surrogate', 0xD90D, + (isASCII) ? 3 : 4, + 2, + qr/surrogate/ + ], + [ "highest surrogate", + (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), + $::UTF8_WARN_SURROGATE, $::UTF8_DISALLOW_SURROGATE, $::UTF8_GOT_SURROGATE, + 'surrogate', 0xDFFF, + (isASCII) ? 3 : 4, + 2, + qr/surrogate/ + ], + [ "first non_unicode", + (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), + $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, + 'non_unicode', 0x110000, + (isASCII) ? 4 : 5, + 2, + qr/(not Unicode|for a non-Unicode code point).* may not be portable/ + ], + [ "non_unicode whose first byte tells that", + (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, + 'non_unicode', + (isASCII) ? 0x140000 : 0x200000, + (isASCII) ? 4 : 5, + 1, + qr/(not Unicode|for a non-Unicode code point).* may not be portable/ + ], + [ "first of 32 consecutive non-character code points", + (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFDD0, + (isASCII) ? 3 : 4, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "a mid non-character code point of the 32 consecutive ones", + (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFDE0, + (isASCII) ? 3 : 4, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "final of 32 consecutive non-character code points", + (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFDEF, + (isASCII) ? 3 : 4, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFE", + (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFFFE, + (isASCII) ? 3 : 4, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFF", + (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFFFF, + (isASCII) ? 3 : 4, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+1FFFE", + (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x1FFFE, + 4, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+1FFFF", + (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x1FFFF, + 4, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+2FFFE", + (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x2FFFE, + 4, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+2FFFF", + (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x2FFFF, + 4, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+3FFFE", + (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x3FFFE, + 4, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+3FFFF", + (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x3FFFF, + 4, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+4FFFE", + (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x4FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+4FFFF", + (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x4FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+5FFFE", + (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x5FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+5FFFF", + (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x5FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+6FFFE", + (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x6FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+6FFFF", + (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x6FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+7FFFE", + (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x7FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+7FFFF", + (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x7FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+8FFFE", + (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x8FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+8FFFF", + (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x8FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+9FFFE", + (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x9FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+9FFFF", + (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x9FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+AFFFE", + (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xAFFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+AFFFF", + (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xAFFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+BFFFE", + (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xBFFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+BFFFF", + (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xBFFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+CFFFE", + (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xCFFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+CFFFF", + (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xCFFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+DFFFE", + (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xDFFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+DFFFF", + (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xDFFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+EFFFE", + (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xEFFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+EFFFF", + (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xEFFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFFE", + (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFFFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFFF", + (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0xFFFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+10FFFE", + (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x10FFFE, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+10FFFF", + (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), + $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, + 'nonchar', 0x10FFFF, + (isASCII) ? 4 : 5, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "requires at least 32 bits", + (isASCII) + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + # This code point is chosen so that it is representable in a UV on + # 32-bit machines + $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x80000000, + (isASCII) ? 7 : $::max_bytes, + (isASCII) ? 1 : 8, + nonportable_regex(0x80000000) + ], + [ "highest 32 bit code point", + (isASCII) + ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), + $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0xFFFFFFFF, + (isASCII) ? 7 : $::max_bytes, + (isASCII) ? 1 : 8, + nonportable_regex(0xffffffff) + ], + [ "requires at least 32 bits, and use SUPER-type flags, instead of" + . " ABOVE_31_BIT", + (isASCII) + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, + 'utf8', 0x80000000, + (isASCII) ? 7 : $::max_bytes, + 1, + nonportable_regex(0x80000000) + ], + [ "overflow with warnings/disallow for more than 31 bits", + # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT + # with overflow. The overflow malformation is never allowed, so + # preventing it takes precedence if the ABOVE_31_BIT options would + # otherwise allow in an overflowing value. The ASCII code points (1 + # for 32-bits; 1 for 64) were chosen because the old overflow + # detection algorithm did not catch them; this means this test also + # checks for that fix. The EBCDIC are arbitrary overflowing ones + # since we have no reports of failures with it. + (($::is64bit) + ? ((isASCII) + ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) + : ((isASCII) + ? "\xfe\x86\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), + $::UTF8_WARN_ABOVE_31_BIT, + $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0, + (! isASCII || $::is64bit) ? $::max_bytes : 7, + (isASCII || $::is64bit) ? 2 : 8, + qr/overflows/ + ], +); + +if (! $::is64bit) { + if (isASCII) { + no warnings qw{portable overflow}; + push @tests, + [ "Lowest 33 bit code point: overflow", + "\xFE\x84\x80\x80\x80\x80\x80", + $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x100000000, + 7, 1, + qr/and( is)? not portable/ + ]; + } +} +else { + no warnings qw{portable overflow}; + push @tests, + [ "More than 32 bits", + (isASCII) + ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x1000000000, + $::max_bytes, (isASCII) ? 1 : 7, + qr/and( is)? not portable/ + ]; + if (! isASCII) { + push @tests, # These could falsely show wrongly in a naive + # implementation + [ "requires at least 32 bits", + I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x800000000, + $::max_bytes, 7, + nonportable_regex(0x80000000) + ], + [ "requires at least 32 bits", + I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x10000000000, + $::max_bytes, 6, + nonportable_regex(0x10000000000) + ], + [ "requires at least 32 bits", + I8_to_native( + "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x200000000000, + $::max_bytes, 5, + nonportable_regex(0x20000000000) + ], + [ "requires at least 32 bits", + I8_to_native( + "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x4000000000000, + $::max_bytes, 4, + nonportable_regex(0x4000000000000) + ], + [ "requires at least 32 bits", + I8_to_native( + "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x80000000000000, + $::max_bytes, 3, + nonportable_regex(0x80000000000000) + ], + [ "requires at least 32 bits", + I8_to_native( + "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, + $::UTF8_GOT_ABOVE_31_BIT, + 'utf8', 0x1000000000000000, + $::max_bytes, 2, + nonportable_regex(0x1000000000000000) + ]; + } +} + +# This test is split into this number of files. +my $num_test_files = $ENV{TEST_JOBS} || 1; +$num_test_files = 10 if $num_test_files > 10; + +my $test_count = -1; +foreach my $test (@tests) { + $test_count++; + next if $test_count % $num_test_files != $::TEST_CHUNK; + + my ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, + $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message + ) = @$test; + + my $length = length $bytes; + my $will_overflow = $testname =~ /overflow/ ? 'overflow' : ""; + + { + use warnings; + undef @warnings; + my $ret = test_isUTF8_CHAR($bytes, $length); + my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0); + if ($will_overflow) { + is($ret, 0, "isUTF8_CHAR() $testname: returns 0"); + is($ret_flags, 0, "isUTF8_CHAR_flags() $testname: returns 0"); + } + else { + is($ret, $length, + "isUTF8_CHAR() $testname: returns expected length: $length"); + is($ret_flags, $length, "isUTF8_CHAR_flags(...,0) $testname:" + . " returns expected length: $length"); + } + is(scalar @warnings, 0, + "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated" + . " no warnings") + or output_warnings(@warnings); + + undef @warnings; + $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); + if ($will_overflow) { + is($ret, 0, "isSTRICT_UTF8_CHAR() $testname: returns 0"); + } + else { + my $expected_ret = ( $testname =~ /surrogate|non-character/ + || $allowed_uv > 0x10FFFF) + ? 0 + : $length; + is($ret, $expected_ret, "isSTRICT_UTF8_CHAR() $testname: returns" + . " expected length: $expected_ret"); + $ret = test_isUTF8_CHAR_flags($bytes, $length, + $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE); + is($ret, $expected_ret, + "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" + . " acts like isSTRICT_UTF8_CHAR"); + } + is(scalar @warnings, 0, + "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" + . " generated no warnings") + or output_warnings(@warnings); + + undef @warnings; + $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); + if ($will_overflow) { + is($ret, 0, "isC9_STRICT_UTF8_CHAR() $testname: returns 0"); + } + else { + my $expected_ret = ( $testname =~ /surrogate/ + || $allowed_uv > 0x10FFFF) + ? 0 + : $length; + is($ret, $expected_ret, "isC9_STRICT_UTF8_CHAR() $testname:" + ." returns expected length: $expected_ret"); + $ret = test_isUTF8_CHAR_flags($bytes, $length, + $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); + is($ret, $expected_ret, + "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" + . " acts like isC9_STRICT_UTF8_CHAR"); + } + is(scalar @warnings, 0, + "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" + . " generated no warnings") + or output_warnings(@warnings); + + # Test partial character handling, for each byte not a full character + for my $j (1.. $length - 1) { + + # Skip the test for the interaction between overflow and above-31 + # bit. It is really testing other things than the partial + # character tests, for which other tests in this file are + # sufficient + last if $testname =~ /overflow/; + + foreach my $disallow_flag (0, $disallow_flags) { + my $partial = substr($bytes, 0, $j); + my $ret_should_be; + my $comment; + if ($disallow_flag) { + $ret_should_be = 0; + $comment = "disallowed"; + if ($j < $needed_to_discern_len) { + $ret_should_be = 1; + $comment .= ", but need $needed_to_discern_len bytes" + . " to discern:"; + } + } + else { + $ret_should_be = 1; + $comment = "allowed"; + } + + undef @warnings; + + $ret = test_is_utf8_valid_partial_char_flags($partial, $j, + $disallow_flag); + is($ret, $ret_should_be, + "$testname: is_utf8_valid_partial_char_flags(" + . display_bytes($partial) + . "), $comment: returns $ret_should_be"); + is(scalar @warnings, 0, + "$testname: is_utf8_valid_partial_char_flags()" + . " generated no warnings") + or output_warnings(@warnings); + } + } + } + + # This is more complicated than the malformations tested earlier, as there + # are several orthogonal variables involved. We test all the subclasses + # of utf8 warnings to verify they work with and without the utf8 class, + # and don't have effects on other sublass warnings + foreach my $warning ('utf8', 'surrogate', 'nonchar', 'non_unicode') { + foreach my $warn_flag (0, $warn_flags) { + foreach my $disallow_flag (0, $disallow_flags) { + foreach my $do_warning (0, 1) { + + # We try each of the above with various combinations of + # malformations that can occur on the same input sequence. + foreach my $short ("", "short") { + foreach my $unexpected_noncont ("", + "unexpected non-continuation") + { + foreach my $overlong ("", "overlong") { + + # If we're already at the longest possible, we + # can't create an overlong (which would be longer) + # can't handle anything larger. + next if $overlong && $expected_len >= $::max_bytes; + + my @malformations; + my @expected_errors; + push @malformations, $short if $short; + push @malformations, $unexpected_noncont + if $unexpected_noncont; + push @malformations, $overlong if $overlong; + + # The overflow malformation test in the input + # array is coerced into being treated like one of + # the others. + if ($will_overflow) { + push @malformations, 'overflow'; + push @expected_errors, $::UTF8_GOT_OVERFLOW; + } + + my $malformations_name = join "/", @malformations; + $malformations_name .= " malformation" + if $malformations_name; + $malformations_name .= "s" if @malformations > 1; + my $this_bytes = $bytes; + my $this_length = $length; + my $expected_uv = $allowed_uv; + my $this_expected_len = $expected_len; + my $this_needed_to_discern_len = $needed_to_discern_len; + if ($malformations_name) { + $expected_uv = 0; + + # Coerce the input into the desired + # malformation + if ($malformations_name =~ /overlong/) { + + # For an overlong, we convert the original + # start byte into a continuation byte with + # the same data bits as originally. ... + substr($this_bytes, 0, 1) + = start_byte_to_cont(substr($this_bytes, + 0, 1)); + + # ... Then we prepend it with a known + # overlong sequence. This should evaluate + # to the exact same code point as the + # original. + $this_bytes + = I8_to_native("\xff") + . (I8_to_native(chr $::first_continuation) + x ( $::max_bytes - 1 - length($this_bytes))) + . $this_bytes; + $this_length = length($this_bytes); + $this_needed_to_discern_len + = $::max_bytes - ($this_expected_len + - $this_needed_to_discern_len); + $this_expected_len = $::max_bytes; + push @expected_errors, $::UTF8_GOT_LONG; + } + if ($malformations_name =~ /short/) { + + # Just tell the test to not look far + # enough into the input. + $this_length--; + $this_expected_len--; + push @expected_errors, $::UTF8_GOT_SHORT; + } + if ($malformations_name + =~ /non-continuation/) + { + # Change the final continuation byte into + # a non one. + my $pos = ($short) ? -2 : -1; + substr($this_bytes, $pos, 1) = '?'; + $this_expected_len--; + push @expected_errors, + $::UTF8_GOT_NON_CONTINUATION; + } + } + + my $eval_warn = $do_warning + ? "use warnings '$warning'" + : $warning eq "utf8" + ? "no warnings 'utf8'" + : ( "use warnings 'utf8';" + . " no warnings '$warning'"); + + # Is effectively disallowed if we've set up a + # malformation, even if the flag indicates it is + # allowed. Fix up test name to indicate this as + # well + my $disallowed = $disallow_flag + || $malformations_name; + my $this_name = "utf8n_to_uvchr_error() $testname: " + . (($disallow_flag) + ? 'disallowed' + : $disallowed + ? $disallowed + : 'allowed'); + $this_name .= ", $eval_warn"; + $this_name .= ", " . (($warn_flag) + ? 'with warning flag' + : 'no warning flag'); + + undef @warnings; + my $ret_ref; + my $display_bytes = display_bytes($this_bytes); + my $call = " Call was: $eval_warn; \$ret_ref" + . " = test_utf8n_to_uvchr_error(" + . "'$display_bytes', $this_length," + . "$warn_flag" + . "|$disallow_flag)"; + my $eval_text = "$eval_warn; \$ret_ref" + . " = test_utf8n_to_uvchr_error(" + . "'$this_bytes'," + . " $this_length, $warn_flag" + . "|$disallow_flag)"; + eval "$eval_text"; + if (! ok ("$@ eq ''", + "$this_name: eval succeeded")) + { + diag "\$!='$!'; eval'd=\"$call\""; + next; + } + if ($disallowed) { + is($ret_ref->[0], 0, "$this_name: Returns 0") + or diag $call; + } + else { + is($ret_ref->[0], $expected_uv, + "$this_name: Returns expected uv: " + . sprintf("0x%04X", $expected_uv)) + or diag $call; + } + is($ret_ref->[1], $this_expected_len, + "$this_name: Returns expected length:" + . " $this_expected_len") + or diag $call; + + my $errors = $ret_ref->[2]; + + for (my $i = @expected_errors - 1; $i >= 0; $i--) { + if (ok($expected_errors[$i] & $errors, + "Expected and got error bit return" + . " for $malformations[$i] malformation")) + { + $errors &= ~$expected_errors[$i]; + } + splice @expected_errors, $i, 1; + } + is(scalar @expected_errors, 0, + "Got all the expected malformation errors") + or diag Dumper \@expected_errors; + + if ( $this_expected_len >= $this_needed_to_discern_len + && ($warn_flag || $disallow_flag)) + { + is($errors, $expected_error_flags, + "Got the correct error flag") + or diag $call; + } + else { + is($errors, 0, "Got no other error flag"); + } + + if (@malformations) { + if (! $do_warning && $warning eq 'utf8') { + goto no_warnings_expected; + } + + # Check that each malformation generates a + # warning, removing that warning if found + MALFORMATION: + foreach my $malformation (@malformations) { + foreach (my $i = 0; $i < @warnings; $i++) { + if ($warnings[$i] =~ /$malformation/) { + pass("Expected and got" + . "'$malformation' warning"); + splice @warnings, $i, 1; + next MALFORMATION; + } + } + fail("Expected '$malformation' warning" + . " but didn't get it"); + + } + } + + # Any overflow will override any super or above-31 + # warnings. + goto no_warnings_expected + if $will_overflow || $this_expected_len + < $this_needed_to_discern_len; + + if ( ! $do_warning + && ( $warning eq 'utf8' + || $warning eq $category)) + { + goto no_warnings_expected; + } + elsif ($warn_flag) { + if (is(scalar @warnings, 1, + "$this_name: Got a single warning ")) + { + like($warnings[0], $message, + "$this_name: Got expected warning") + or diag $call; + } + else { + diag $call; + if (scalar @warnings) { + output_warnings(@warnings); + } + } + } + else { + no_warnings_expected: + unless (is(scalar @warnings, 0, + "$this_name: Got no warnings")) + { + diag $call; + output_warnings(@warnings); + } + } + + # Check CHECK_ONLY results when the input is + # disallowed. Do this when actually disallowed, + # not just when the $disallow_flag is set + if ($disallowed) { + undef @warnings; + $ret_ref = test_utf8n_to_uvchr_error( + $this_bytes, $this_length, + $disallow_flag|$::UTF8_CHECK_ONLY); + is($ret_ref->[0], 0, + "$this_name, CHECK_ONLY: Returns 0") + or diag $call; + is($ret_ref->[1], -1, + "$this_name: CHECK_ONLY: returns -1 for length") + or diag $call; + if (! is(scalar @warnings, 0, + "$this_name, CHECK_ONLY: no warnings" + . " generated")) + { + diag $call; + output_warnings(@warnings); + } + } + + # Now repeat some of the above, but for + # uvchr_to_utf8_flags(). Since this comes from an + # existing code point, it hasn't overflowed, and + # isn't malformed. + next if @malformations; + + # The warning and disallow flags passed in are for + # utf8n_to_uvchr_error(). Convert them for + # uvchr_to_utf8_flags(). + my $uvchr_warn_flag = 0; + my $uvchr_disallow_flag = 0; + if ($warn_flag) { + if ($warn_flag == $::UTF8_WARN_SURROGATE) { + $uvchr_warn_flag = $::UNICODE_WARN_SURROGATE + } + elsif ($warn_flag == $::UTF8_WARN_NONCHAR) { + $uvchr_warn_flag = $::UNICODE_WARN_NONCHAR + } + elsif ($warn_flag == $::UTF8_WARN_SUPER) { + $uvchr_warn_flag = $::UNICODE_WARN_SUPER + } + elsif ($warn_flag == $::UTF8_WARN_ABOVE_31_BIT) { + $uvchr_warn_flag + = $::UNICODE_WARN_ABOVE_31_BIT; + } + else { + fail(sprintf "Unexpected warn flag: %x", + $warn_flag); + next; + } + } + if ($disallow_flag) { + if ($disallow_flag == $::UTF8_DISALLOW_SURROGATE) + { + $uvchr_disallow_flag + = $::UNICODE_DISALLOW_SURROGATE; + } + elsif ($disallow_flag == $::UTF8_DISALLOW_NONCHAR) + { + $uvchr_disallow_flag + = $::UNICODE_DISALLOW_NONCHAR; + } + elsif ($disallow_flag == $::UTF8_DISALLOW_SUPER) { + $uvchr_disallow_flag + = $::UNICODE_DISALLOW_SUPER; + } + elsif ($disallow_flag + == $::UTF8_DISALLOW_ABOVE_31_BIT) + { + $uvchr_disallow_flag = + $::UNICODE_DISALLOW_ABOVE_31_BIT; + } + else { + fail(sprintf "Unexpected disallow flag: %x", + $disallow_flag); + next; + } + } + + $disallowed = $uvchr_disallow_flag; + + $this_name = "uvchr_to_utf8_flags() $testname: " + . (($uvchr_disallow_flag) + ? 'disallowed' + : ($disallowed) + ? 'ABOVE_31_BIT allowed' + : 'allowed'); + $this_name .= ", $eval_warn"; + $this_name .= ", " . (($uvchr_warn_flag) + ? 'with warning flag' + : 'no warning flag'); + + undef @warnings; + my $ret; + my $warn_flag = sprintf "0x%x", $uvchr_warn_flag; + my $disallow_flag = sprintf "0x%x", + $uvchr_disallow_flag; + $call = sprintf(" Call was: $eval_warn; \$ret" + . " = test_uvchr_to_utf8_flags(" + . " 0x%x, $warn_flag|$disallow_flag)", + $allowed_uv); + $eval_text = "$eval_warn; \$ret =" + . " test_uvchr_to_utf8_flags(" + . "$allowed_uv, $warn_flag|" + . "$disallow_flag)"; + eval "$eval_text"; + if (! ok ("$@ eq ''", "$this_name: eval succeeded")) + { + diag "\$!='$!'; eval'd=\"$eval_text\""; + next; + } + if ($disallowed) { + is($ret, undef, "$this_name: Returns undef") + or diag $call; + } + else { + is($ret, $bytes, "$this_name: Returns expected string") + or diag $call; + } + if (! $do_warning + && ($warning eq 'utf8' || $warning eq $category)) + { + if (!is(scalar @warnings, 0, + "$this_name: No warnings generated")) + { + diag $call; + output_warnings(@warnings); + } + } + elsif ( $uvchr_warn_flag + && ( $warning eq 'utf8' + || $warning eq $category)) + { + if (is(scalar @warnings, 1, + "$this_name: Got a single warning ")) + { + like($warnings[0], $message, + "$this_name: Got expected warning") + or diag $call; + } + else { + diag $call; + output_warnings(@warnings) + if scalar @warnings; + } + } + } + } + } + } + } + } + } +} + +done_testing; diff --git a/gv.c b/gv.c index fff8e95..8c85614 100644 --- a/gv.c +++ b/gv.c @@ -736,7 +736,7 @@ S_gv_fetchmeth_internal(pTHX_ HV* stash, SV* meth, const char* name, STRLEN len, /* check locally for a real method or a cache entry */ he = (HE*)hv_common( - cachestash, meth, name, len, (flags & SVf_UTF8) ? HVhek_UTF8 : 0, create, NULL, 0 + cachestash, meth, name, len, is_utf8 ? HVhek_UTF8 : 0, create, NULL, 0 ); if (he) gvp = (GV**)&HeVAL(he); else gvp = NULL; @@ -1663,8 +1663,15 @@ S_parse_gv_stash_name(pTHX_ HV **stash, GV **gv, const char **name, name_cursor++; *name = name_cursor+1; if (*name == name_end) { - if (!*gv) - *gv = MUTABLE_GV(*hv_fetchs(PL_defstash, "main::", TRUE)); + if (!*gv) { + *gv = MUTABLE_GV(*hv_fetchs(PL_defstash, "main::", TRUE)); + if (SvTYPE(*gv) != SVt_PVGV) { + gv_init_pvn(*gv, PL_defstash, "main::", 6, + GV_ADDMULTI); + GvHV(*gv) = + MUTABLE_HV(SvREFCNT_inc_simple(PL_defstash)); + } + } return TRUE; } } diff --git a/handy.h b/handy.h index 4d2f4bc..80f9cf4 100644 --- a/handy.h +++ b/handy.h @@ -485,8 +485,13 @@ Returns zero if non-equal, or non-zero if equal. #define strnNE(s1,s2,l) (strncmp(s1,s2,l)) #define strnEQ(s1,s2,l) (!strncmp(s1,s2,l)) +/* These names are controversial, so guarding against their being used in more + * places than they already are. strBEGs and StrStartsWith are potential + * candidates */ +#if defined(PERL_IN_DOIO_C) || defined(PERL_IN_GV_C) || defined(PERL_IN_HV_C) || defined(PERL_IN_LOCALE_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_TOKE_C) || defined(PERL_EXT) #define strNEs(s1,s2) (strncmp(s1,"" s2 "", sizeof(s2)-1)) #define strEQs(s1,s2) (!strncmp(s1,"" s2 "", sizeof(s2)-1)) +#endif #ifdef HAS_MEMCMP # define memNE(s1,s2,l) (memcmp(s1,s2,l)) diff --git a/hints/catamount.sh b/hints/catamount.sh index 0c075fd..35b96af 100644 --- a/hints/catamount.sh +++ b/hints/catamount.sh @@ -31,11 +31,11 @@ # mkdir -p /opt/perl-catamount # mkdir -p /opt/perl-catamount/include # mkdir -p /opt/perl-catamount/lib -# mkdir -p /opt/perl-catamount/lib/perl5/5.25.9 +# mkdir -p /opt/perl-catamount/lib/perl5/5.25.10 # mkdir -p /opt/perl-catamount/bin # cp *.h /opt/perl-catamount/include # cp libperl.a /opt/perl-catamount/lib -# cp -pr lib/* /opt/perl-catamount/lib/perl5/5.25.9 +# cp -pr lib/* /opt/perl-catamount/lib/perl5/5.25.10 # cp miniperl perl run.sh cc.sh /opt/perl-catamount/lib # # With the headers and the libperl.a you can embed Perl to your Catamount diff --git a/hv.c b/hv.c index 7239892..efeadb7 100644 --- a/hv.c +++ b/hv.c @@ -390,7 +390,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, flags = is_utf8 ? HVhek_UTF8 : 0; } } else { - is_utf8 = ((flags & HVhek_UTF8) ? TRUE : FALSE); + is_utf8 = cBOOL(flags & HVhek_UTF8); } if (action & HV_DELETE) { @@ -990,7 +990,7 @@ Perl_hv_bucket_ratio(pTHX_ HV *hv) } sv = sv_newmortal(); - if (HvUSEDKEYS((const HV *)hv)) + if (HvUSEDKEYS((HV *)hv)) Perl_sv_setpvf(aTHX_ sv, "%ld/%ld", (long)HvFILL(hv), (long)HvMAX(hv) + 1); else @@ -1029,7 +1029,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HE *entry; HE **oentry; HE **first_entry; - bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE; + bool is_utf8 = cBOOL(k_flags & HVhek_UTF8); int masked_flags; HEK *keysv_hek = NULL; U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ diff --git a/hv.h b/hv.h index 3a46ea3..8411b5d 100644 --- a/hv.h +++ b/hv.h @@ -325,7 +325,7 @@ C. ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvAUX(hv)->xhv_name_count != -1) \ ? HEK_UTF8(HvENAME_HEK_NN(hv)) : 0) -/* the number of keys (including any placeholders) */ +/* the number of keys (including any placeholders) - NOT PART OF THE API */ #define XHvTOTALKEYS(xhv) ((xhv)->xhv_keys) /* @@ -461,8 +461,7 @@ C. (val), (hash))) #define hv_exists_ent(hv, keysv, hash) \ - (hv_common((hv), (keysv), NULL, 0, 0, HV_FETCH_ISEXISTS, 0, (hash)) \ - ? TRUE : FALSE) + cBOOL(hv_common((hv), (keysv), NULL, 0, 0, HV_FETCH_ISEXISTS, 0, (hash))) #define hv_fetch_ent(hv, keysv, lval, hash) \ ((HE *) hv_common((hv), (keysv), NULL, 0, 0, \ ((lval) ? HV_FETCH_LVALUE : 0), NULL, (hash))) @@ -483,8 +482,7 @@ C. #define hv_exists(hv, key, klen) \ - (hv_common_key_len((hv), (key), (klen), HV_FETCH_ISEXISTS, NULL, 0) \ - ? TRUE : FALSE) + cBOOL(hv_common_key_len((hv), (key), (klen), HV_FETCH_ISEXISTS, NULL, 0)) #define hv_fetch(hv, key, klen, lval) \ ((SV**) hv_common_key_len((hv), (key), (klen), (lval) \ diff --git a/inline.h b/inline.h index acd19e5..f7bd4a3 100644 --- a/inline.h +++ b/inline.h @@ -1645,6 +1645,92 @@ S_cx_popgiven(pTHX_ PERL_CONTEXT *cx) SvREFCNT_dec(sv); } +/* ------------------ util.h ------------------------------------------- */ + +/* +=head1 Miscellaneous Functions + +=for apidoc foldEQ + +Returns true if the leading C bytes of the strings C and C are the +same +case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes +match themselves and their opposite case counterparts. Non-cased and non-ASCII +range bytes match only themselves. + +=cut +*/ + +PERL_STATIC_INLINE I32 +Perl_foldEQ(const char *s1, const char *s2, I32 len) +{ + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold[*b]) + return 0; + a++,b++; + } + return 1; +} + +I32 +Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len) +{ + /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on + * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor + * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor + * does it check that the strings each have at least 'len' characters */ + + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LATIN1; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold_latin1[*b]) { + return 0; + } + a++, b++; + } + return 1; +} + +/* +=for apidoc foldEQ_locale + +Returns true if the leading C bytes of the strings C and C are the +same case-insensitively in the current locale; false otherwise. + +=cut +*/ + +I32 +Perl_foldEQ_locale(const char *s1, const char *s2, I32 len) +{ + dVAR; + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LOCALE; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold_locale[*b]) + return 0; + a++,b++; + } + return 1; +} + /* * ex: set ts=8 sts=4 sw=4 et: */ diff --git a/lib/B/Op_private.pm b/lib/B/Op_private.pm index cd4f15b..59c8408 100644 --- a/lib/B/Op_private.pm +++ b/lib/B/Op_private.pm @@ -118,7 +118,7 @@ package B::Op_private; our %bits; -our $VERSION = "5.025009"; +our $VERSION = "5.025010"; $bits{$_}{3} = 'OPpENTERSUB_AMPER' for qw(entersub rv2cv); $bits{$_}{6} = 'OPpENTERSUB_DB' for qw(entersub rv2cv); diff --git a/lib/Getopt/Std.pm b/lib/Getopt/Std.pm index b7f8132..5b8878d 100644 --- a/lib/Getopt/Std.pm +++ b/lib/Getopt/Std.pm @@ -4,7 +4,7 @@ require Exporter; =head1 NAME -getopt, getopts - Process single-character switches with switch clustering +Getopt::Std - Process single-character switches with switch clustering =head1 SYNOPSIS @@ -81,7 +81,7 @@ and version_mess() with the switches string as an argument. @ISA = qw(Exporter); @EXPORT = qw(getopt getopts); -$VERSION = '1.11'; +$VERSION = '1.12'; # uncomment the next line to disable 1.03-backward compatibility paranoia # $STANDARD_HELP_VERSION = 1; diff --git a/lib/PerlIO.pm b/lib/PerlIO.pm index 2e27f98..7658ce4 100644 --- a/lib/PerlIO.pm +++ b/lib/PerlIO.pm @@ -1,6 +1,6 @@ package PerlIO; -our $VERSION = '1.09'; +our $VERSION = '1.10'; # Map layer name to package that defines it our %alias; @@ -104,7 +104,7 @@ is chosen to render simple text parts (i.e. non-accented letters, digits and common punctuation) human readable in the encoded file. (B: This layer does not validate byte sequences. For reading input, -you should instead use C<:encoding(utf8)> instead of bare C<:utf8>.) +you should instead use C<:encoding(UTF-8)> instead of bare C<:utf8>.) Here is how to write your native data out using UTF-8 (or UTF-EBCDIC) and then read it back in. diff --git a/lib/feature.pm b/lib/feature.pm index fe5c513..ed13273 100644 --- a/lib/feature.pm +++ b/lib/feature.pm @@ -5,7 +5,7 @@ package feature; -our $VERSION = '1.46'; +our $VERSION = '1.47'; our %feature = ( fc => 'feature_fc', @@ -180,50 +180,22 @@ operator|perlop/Range Operators>. =head2 The 'unicode_eval' and 'evalbytes' features -Under the C feature, Perl's C function, when passed a -string, will evaluate it as a string of characters, ignoring any -C declarations. C exists to declare the encoding of -the script, which only makes sense for a stream of bytes, not a string of -characters. Source filters are forbidden, as they also really only make -sense on strings of bytes. Any attempt to activate a source filter will -result in an error. - -The C feature enables the C keyword, which evaluates -the argument passed to it as a string of bytes. It dies if the string -contains any characters outside the 8-bit range. Source filters work -within C: they apply to the contents of the string being -evaluated. - -Together, these two features are intended to replace the historical C -function, which has (at least) two bugs in it, that cannot easily be fixed -without breaking existing programs: - -=over - -=item * - -C behaves differently depending on the internal encoding of the -string, sometimes treating its argument as a string of bytes, and sometimes -as a string of characters. - -=item * - -Source filters activated within C leak out into whichever I -scope is currently being compiled. To give an example with the CPAN module -L: - - BEGIN { eval "use Semi::Semicolons; # not filtered here " } - # filtered here! - -C fixes that to work the way one would expect: - - use feature "evalbytes"; - BEGIN { evalbytes "use Semi::Semicolons; # filtered " } - # not filtered - -=back - -These two features are available starting with Perl 5.16. +Together, these two features are intended to replace the legacy string +C function, which behaves problematically in some instances. They are +available starting with Perl 5.16, and are enabled by default by a +S> or higher declaration. + +C changes the behavior of plain string C to work more +consistently, especially in the Unicode world. Certain (mis)behaviors +couldn't be changed without breaking some things that had come to rely on +them, so the feature can be enabled and disabled. Details are at +L. + +C is like string C, but operating on a byte stream that is +not UTF-8 encoded. Details are at L. Without a +S> nor a S> (or higher) declaration in +the current scope, you can still access it by instead writing +C. =head2 The 'current_sub' feature diff --git a/lib/open.pm b/lib/open.pm index fd22e1b..ca3cf7b 100644 --- a/lib/open.pm +++ b/lib/open.pm @@ -1,7 +1,7 @@ package open; use warnings; -our $VERSION = '1.10'; +our $VERSION = '1.11'; require 5.008001; # for PerlIO::get_layers() @@ -153,7 +153,7 @@ open - perl pragma to set default PerlIO layers for input and output use open IO => ':locale'; - use open ':encoding(utf8)'; + use open ':encoding(UTF-8)'; use open ':locale'; use open ':encoding(iso-8859-7)'; @@ -195,8 +195,8 @@ For example: These are equivalent - use open ':encoding(utf8)'; - use open IO => ':encoding(utf8)'; + use open ':encoding(UTF-8)'; + use open IO => ':encoding(UTF-8)'; as are these @@ -221,8 +221,8 @@ The C<:std> subpragma on its own has no effect, but if combined with the C<:utf8> or C<:encoding> subpragmas, it converts the standard filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected for input/output handles. For example, if both input and out are -chosen to be C<:encoding(utf8)>, a C<:std> will mean that STDIN, STDOUT, -and STDERR are also in C<:encoding(utf8)>. On the other hand, if only +chosen to be C<:encoding(UTF-8)>, a C<:std> will mean that STDIN, STDOUT, +and STDERR are also in C<:encoding(UTF-8)>. On the other hand, if only output is chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the STDOUT and STDERR to be in C. The C<:locale> subpragma implicitly turns on C<:std>. diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 5424617..e3336f5 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -18835,31 +18835,56 @@ EOF_CODE ? "\nsub TODO_FAILING_BREAKS { 1 }\n" : "\nsub TODO_FAILING_BREAKS { 0 }\n"; - - push @output, - (map {"Test_GCB('$_');\n"} @backslash_X_tests), - (map {"Test_LB('$_');\n"} @LB_tests), - (map {"Test_SB('$_');\n"} @SB_tests), - (map {"Test_WB('$_');\n"} @WB_tests); - @output= map { map s/^/ /mgr, map "$_;\n", split /;\n/, $_ } @output; + # Cause there to be 'if' statements to only execute a portion of this + # long-running test each time, so that we can have a bunch of .t's running + # in parallel + my $chunks = 10 # Number of test files + - 1 # For GCB & SB + - 1 # For WB + - 4; # LB split into this many files my @output_chunked; my $chunk_count=0; - my $chunk_size= int(@output/10)+1; + my $chunk_size= int(@output / $chunks) + 1; while (@output) { $chunk_count++; my @chunk= splice @output, 0, $chunk_size; push @output_chunked, - "if (!\$::TESTCHUNK or \$::TESTCHUNK == $chunk_count){\n", - @chunk, + "if (!\$::TESTCHUNK or \$::TESTCHUNK == $chunk_count) {\n", + @chunk, + "}\n"; + } + + $chunk_count++; + push @output_chunked, + "if (!\$::TESTCHUNK or \$::TESTCHUNK == $chunk_count) {\n", + (map {" Test_GCB('$_');\n"} @backslash_X_tests), + (map {" Test_SB('$_');\n"} @SB_tests), + "}\n"; + + + $chunk_size= int(@LB_tests / 4) + 1; + @LB_tests = map {" Test_LB('$_');\n"} @LB_tests; + while (@LB_tests) { + $chunk_count++; + my @chunk= splice @LB_tests, 0, $chunk_size; + push @output_chunked, + "if (!\$::TESTCHUNK or \$::TESTCHUNK == $chunk_count) {\n", + @chunk, "}\n"; } + $chunk_count++; + push @output_chunked, + "if (!\$::TESTCHUNK or \$::TESTCHUNK == $chunk_count) {\n", + (map {" Test_WB('$_');\n"} @WB_tests), + "}\n"; + &write($t_path, 0, # Not utf8; [$HEADER, diff --git a/lib/utf8.t b/lib/utf8.t index 6b28eae..d35110b 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -140,6 +140,9 @@ no utf8; # Ironic, no? = join " . ", map {sprintf 'chr (%d)', ord $_} split //, $char; push @char, [$_, $char, $charsubst, $char_as_ord]; } + my $malformed = $::IS_ASCII + ? "\xE1\xA0" + : I8_to_native("\xE6\xA0"); # Now we've done all the UTF8 munching hopefully we're safe my @tests = ( ['check our detection program works', @@ -162,10 +165,10 @@ no utf8; # Ironic, no? # "out of memory" error. We really need the "" [rather than qq() # or q()] to get the best explosion. ["!Feed malformed utf8 into perl.", <<"BANG", - use utf8; %a = ("\xE1\xA0"=>"sterling"); + use utf8; %a = ("$malformed" =>"sterling"); print 'start'; printf '%x,', ord \$_ foreach keys %a; print "end\n"; BANG - qr/^Malformed UTF-8 character: .*? \(too short; \d bytes? available, need \d\).*start\d+,end$/sm + qr/^Malformed UTF-8 character: .*? \(unexpected non-continuation byte/ ], ); foreach (@tests) { diff --git a/locale.c b/locale.c index 8521ffd..1ba802f 100644 --- a/locale.c +++ b/locale.c @@ -960,9 +960,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #endif #ifdef DEBUGGING - DEBUG_INITIALIZATION_set((PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT")) - ? TRUE - : FALSE); + DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT"))); # define DEBUG_LOCALE_INIT(category, locale, result) \ STMT_START { \ if (debug_initialization) { \ @@ -1507,10 +1505,10 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, char * x; /* j's xfrm plus collation index */ STRLEN x_len; /* length of 'x' */ STRLEN trial_len = 1; + char cur_source[] = { '\0', '\0' }; - /* Create a 1 byte string of the current code point */ - char cur_source[] = { (char) j, '\0' }; - + /* Skip non-controls the first time through the loop. The + * controls in a UTF-8 locale are the L1 ones */ if (! try_non_controls && (PL_in_utf8_COLLATE_locale) ? ! isCNTRL_L1(j) : ! isCNTRL_LC(j)) @@ -1518,6 +1516,9 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, continue; } + /* Create a 1-char string of the current code point */ + cur_source[0] = (char) j; + /* Then transform it */ x = _mem_collxfrm(cur_source, trial_len, &x_len, 0 /* The string is not in UTF-8 */); @@ -1673,9 +1674,10 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, for (j = 1; j < 256; j++) { char * x; STRLEN x_len; + char cur_source[] = { '\0', '\0' }; - /* Create a 1-char string of the current code point. */ - char cur_source[] = { (char) j, '\0' }; + /* Create a 1-char string of the current code point */ + cur_source[0] = (char) j; /* Then transform it */ x = _mem_collxfrm(cur_source, 1, &x_len, FALSE); @@ -1906,14 +1908,12 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, #ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - Size_t i; print_collxfrm_input_and_return(s, s + len, xlen, utf8); PerlIO_printf(Perl_debug_log, "Its xfrm is:"); - for (i = COLLXFRM_HDR_LEN; i < *xlen + COLLXFRM_HDR_LEN; i++) { - PerlIO_printf(Perl_debug_log, " %02x", (U8) xbuf[i]); - } - PerlIO_printf(Perl_debug_log, "\n"); + PerlIO_printf(Perl_debug_log, "%s\n", + _byte_dump_string((U8 *) xbuf + COLLXFRM_HDR_LEN, + *xlen, 1)); } #endif diff --git a/mathoms.c b/mathoms.c index 92cd77a..66f2cc3 100644 --- a/mathoms.c +++ b/mathoms.c @@ -865,8 +865,7 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash) { PERL_ARGS_ASSERT_HV_EXISTS_ENT; - return hv_common(hv, keysv, NULL, 0, 0, HV_FETCH_ISEXISTS, 0, hash) - ? TRUE : FALSE; + return cBOOL(hv_common(hv, keysv, NULL, 0, 0, HV_FETCH_ISEXISTS, 0, hash)); } HE * @@ -927,8 +926,7 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen_i32) klen = klen_i32; flags = 0; } - return hv_common(hv, NULL, key, klen, flags, HV_FETCH_ISEXISTS, 0, 0) - ? TRUE : FALSE; + return cBOOL(hv_common(hv, NULL, key, klen, flags, HV_FETCH_ISEXISTS, 0, 0)); } SV** diff --git a/mg.c b/mg.c index 69fdc93..b11f66a 100644 --- a/mg.c +++ b/mg.c @@ -171,6 +171,7 @@ Perl_mg_get(pTHX_ SV *sv) const I32 mgs_ix = SSNEW(sizeof(MGS)); bool saved = FALSE; bool have_new = 0; + bool taint_only = TRUE; /* the only get method seen is taint */ MAGIC *newmg, *head, *cur, *mg; PERL_ARGS_ASSERT_MG_GET; @@ -189,10 +190,13 @@ Perl_mg_get(pTHX_ SV *sv) if (!(mg->mg_flags & MGf_GSKIP) && vtbl && vtbl->svt_get) { /* taint's mg get is so dumb it doesn't need flag saving */ - if (!saved && mg->mg_type != PERL_MAGIC_taint) { - save_magic(mgs_ix, sv); - saved = TRUE; - } + if (mg->mg_type != PERL_MAGIC_taint) { + taint_only = FALSE; + if (!saved) { + save_magic(mgs_ix, sv); + saved = TRUE; + } + } vtbl->svt_get(aTHX_ sv, mg); @@ -210,8 +214,23 @@ Perl_mg_get(pTHX_ SV *sv) ~(SVs_GMG|SVs_SMG|SVs_RMG); } else if (vtbl == &PL_vtbl_utf8) { - /* get-magic can reallocate the PV */ - magic_setutf8(sv, mg); + /* get-magic can reallocate the PV, unless there's only taint + * magic */ + if (taint_only) { + MAGIC *mg2; + for (mg2 = nextmg; mg2; mg2 = mg2->mg_moremagic) { + if ( mg2->mg_type != PERL_MAGIC_taint + && !(mg2->mg_flags & MGf_GSKIP) + && mg2->mg_virtual + && mg2->mg_virtual->svt_get + ) { + taint_only = FALSE; + break; + } + } + } + if (!taint_only) + magic_setutf8(sv, mg); } mg = nextmg; @@ -588,9 +607,9 @@ Perl_mg_free_type(pTHX_ SV *sv, int how) MAGIC *mg, *prevmg, *moremg; PERL_ARGS_ASSERT_MG_FREE_TYPE; for (prevmg = NULL, mg = SvMAGIC(sv); mg; prevmg = mg, mg = moremg) { - MAGIC *newhead; moremg = mg->mg_moremagic; if (mg->mg_type == how) { + MAGIC *newhead; /* temporarily move to the head of the magic chain, in case custom free code relies on this historical aspect of mg_free */ if (prevmg) { @@ -925,7 +944,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) } break; case '\010': /* ^H */ - sv_setiv(sv, (IV)PL_hints); + sv_setuv(sv, PL_hints); break; case '\011': /* ^I */ /* NOT \t in EBCDIC */ sv_setpv(sv, PL_inplace); /* Will undefine sv if PL_inplace is NULL */ @@ -989,7 +1008,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) break; case '\027': /* ^W & $^WARNING_BITS */ if (nextchar == '\0') - sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE)); + sv_setiv(sv, (IV)cBOOL(PL_dowarn & G_WARN_ON)); else if (strEQ(remaining, "ARNING_BITS")) { if (PL_compiling.cop_warnings == pWARN_NONE) { sv_setpvn(sv, WARN_NONEstring, WARNsize) ; @@ -1119,9 +1138,9 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) #ifdef HAS_GETGROUPS { Groups_t *gary = NULL; - I32 i; I32 num_groups = getgroups(0, gary); if (num_groups > 0) { + I32 i; Newx(gary, num_groups, Groups_t); num_groups = getgroups(num_groups, gary); for (i = 0; i < num_groups; i++) @@ -2130,7 +2149,6 @@ Perl_magic_setpos(pTHX_ SV *sv, MAGIC *mg) SV* const lsv = LvTARG(sv); SSize_t pos; STRLEN len; - STRLEN ulen = 0; MAGIC* found; const char *s; @@ -2152,7 +2170,7 @@ Perl_magic_setpos(pTHX_ SV *sv, MAGIC *mg) pos = SvIV(sv); if (DO_UTF8(lsv)) { - ulen = sv_or_pv_len_utf8(lsv, s, len); + const STRLEN ulen = sv_or_pv_len_utf8(lsv, s, len); if (ulen) len = ulen; } @@ -2718,7 +2736,15 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) PL_maxsysfd = SvIV(sv); break; case '\010': /* ^H */ - PL_hints = SvIV(sv); + { + U32 save_hints = PL_hints; + PL_hints = SvUV(sv); + + /* If wasn't UTF-8, and now is, notify the parser */ + if ((PL_hints & HINT_UTF8) && ! (save_hints & HINT_UTF8)) { + notify_parser_that_changed_to_utf8(); + } + } break; case '\011': /* ^I */ /* NOT \t in EBCDIC */ Safefree(PL_inplace); diff --git a/op.c b/op.c index 118c519..51ffac2 100644 --- a/op.c +++ b/op.c @@ -487,13 +487,13 @@ void Perl_opslab_force_free(pTHX_ OPSLAB *slab) { OPSLAB *slab2; - OPSLOT *slot; #ifdef DEBUGGING size_t savestack_count = 0; #endif PERL_ARGS_ASSERT_OPSLAB_FORCE_FREE; slab2 = slab; do { + OPSLOT *slot; for (slot = slab2->opslab_first; slot->opslot_next; slot = slot->opslot_next) { @@ -994,8 +994,9 @@ Perl_op_clear(pTHX_ OP *o) /* FALLTHROUGH */ case OP_TRANS: case OP_TRANSR: - if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { - assert(o->op_type == OP_TRANS || o->op_type == OP_TRANSR); + if ( (o->op_type == OP_TRANS || o->op_type == OP_TRANSR) + && (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))) + { #ifdef USE_ITHREADS if (cPADOPo->op_padix > 0) { pad_swipe(cPADOPo->op_padix, TRUE); @@ -1864,7 +1865,6 @@ Perl_scalarvoid(pTHX_ OP *arg) dVAR; OP *kid; SV* sv; - U8 want; SSize_t defer_stack_alloc = 0; SSize_t defer_ix = -1; OP **defer_stack = NULL; @@ -1873,6 +1873,7 @@ Perl_scalarvoid(pTHX_ OP *arg) PERL_ARGS_ASSERT_SCALARVOID; do { + U8 want; SV *useless_sv = NULL; const char* useless = NULL; @@ -4620,27 +4621,86 @@ static OP * S_gen_constant_list(pTHX_ OP *o) { dVAR; - OP *curop; - const SSize_t oldtmps_floor = PL_tmps_floor; + OP *curop, *old_next; + SV * const oldwarnhook = PL_warnhook; + SV * const olddiehook = PL_diehook; + COP *old_curcop; + U8 oldwarn = PL_dowarn; SV **svp; AV *av; + I32 old_cxix; + COP not_compiling; + int ret = 0; + dJMPENV; + bool op_was_null; list(o); if (PL_parser && PL_parser->error_count) return o; /* Don't attempt to run with errors */ curop = LINKLIST(o); + old_next = o->op_next; o->op_next = 0; + op_was_null = o->op_type == OP_NULL; + if (op_was_null) /* b3698342565fb462291fba4b432cfcd05b6eb4e1 */ + o->op_type = OP_CUSTOM; CALL_PEEP(curop); + if (op_was_null) + o->op_type = OP_NULL; S_prune_chain_head(&curop); PL_op = curop; - Perl_pp_pushmark(aTHX); - CALLRUNOPS(aTHX); - PL_op = curop; - assert (!(curop->op_flags & OPf_SPECIAL)); - assert(curop->op_type == OP_RANGE); - Perl_pp_anonlist(aTHX); - PL_tmps_floor = oldtmps_floor; + + old_cxix = cxstack_ix; + create_eval_scope(NULL, G_FAKINGEVAL); + + old_curcop = PL_curcop; + StructCopy(old_curcop, ¬_compiling, COP); + PL_curcop = ¬_compiling; + /* The above ensures that we run with all the correct hints of the + current COP, but that IN_PERL_RUNTIME is true. */ + assert(IN_PERL_RUNTIME); + PL_warnhook = PERL_WARNHOOK_FATAL; + PL_diehook = NULL; + JMPENV_PUSH(ret); + + /* Effective $^W=1. */ + if ( ! (PL_dowarn & G_WARN_ALL_MASK)) + PL_dowarn |= G_WARN_ON; + + switch (ret) { + case 0: + Perl_pp_pushmark(aTHX); + CALLRUNOPS(aTHX); + PL_op = curop; + assert (!(curop->op_flags & OPf_SPECIAL)); + assert(curop->op_type == OP_RANGE); + Perl_pp_anonlist(aTHX); + break; + case 3: + CLEAR_ERRSV(); + o->op_next = old_next; + break; + default: + JMPENV_POP; + PL_warnhook = oldwarnhook; + PL_diehook = olddiehook; + Perl_croak(aTHX_ "panic: gen_constant_list JMPENV_PUSH returned %d", + ret); + } + + JMPENV_POP; + PL_dowarn = oldwarn; + PL_warnhook = oldwarnhook; + PL_diehook = olddiehook; + PL_curcop = old_curcop; + + if (cxstack_ix > old_cxix) { + assert(cxstack_ix == old_cxix + 1); + assert(CxTYPE(CX_CUR()) == CXt_EVAL); + delete_eval_scope(); + } + if (ret) + return o; OpTYPE_set(o, OP_RV2AV); o->op_flags &= ~OPf_REF; /* treat \(1..2) like an ordinary list */ @@ -5783,6 +5843,11 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) rx_flags |= RXf_SPLIT; } + /* Skip compiling if parser found an error for this pattern */ + if (pm->op_pmflags & PMf_HAS_ERROR) { + return o; + } + if (!has_code || !eng->op_comp) { /* compile-time simple constant pattern */ @@ -10498,10 +10563,10 @@ Perl_ck_smartmatch(pTHX_ OP *o) op_sibling_splice(o, NULL, 0, first); /* Implicitly take a reference to a regular expression */ - if (first->op_type == OP_MATCH) { + if (first->op_type == OP_MATCH && !(first->op_flags & OPf_STACKED)) { OpTYPE_set(first, OP_QR); } - if (second->op_type == OP_MATCH) { + if (second->op_type == OP_MATCH && !(second->op_flags & OPf_STACKED)) { OpTYPE_set(second, OP_QR); } } @@ -10830,7 +10895,6 @@ Perl_ck_require(pTHX_ OP *o) if (o->op_flags & OPf_KIDS) { /* Shall we supply missing .pm? */ SVOP * const kid = (SVOP*)cUNOPo->op_first; - HEK *hek; U32 hash; char *s; STRLEN len; @@ -10840,6 +10904,7 @@ Perl_ck_require(pTHX_ OP *o) if (kid->op_private & OPpCONST_BARE) { dVAR; const char *end; + HEK *hek; if (was_readonly) { SvREADONLY_off(sv); @@ -10882,6 +10947,7 @@ Perl_ck_require(pTHX_ OP *o) } else { dVAR; + HEK *hek; if (was_readonly) SvREADONLY_off(sv); PERL_HASH(hash, s, len); hek = share_hek(s, @@ -10922,7 +10988,7 @@ Perl_ck_return(pTHX_ OP *o) PERL_ARGS_ASSERT_CK_RETURN; kid = OpSIBLING(cLISTOPo->op_first); - if (CvLVALUE(PL_compcv)) { + if (PL_compcv && CvLVALUE(PL_compcv)) { for (; kid; kid = OpSIBLING(kid)) op_lvalue(kid, OP_LEAVESUBLV); } @@ -13118,6 +13184,27 @@ S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints) && ( (o->op_private & OPpDEREF) == OPpDEREF_AV || (o->op_private & OPpDEREF) == OPpDEREF_HV); + /* This doesn't make much sense but is legal: + * @{ local $x[0][0] } = 1 + * Since scope exit will undo the autovivification, + * don't bother in the first place. The OP_LEAVE + * assertion is in case there are other cases of both + * OPpLVAL_INTRO and OPpDEREF which don't include a scope + * exit that would undo the local - in which case this + * block of code would need rethinking. + */ + if (is_deref && (o->op_private & OPpLVAL_INTRO)) { +#ifdef DEBUGGING + OP *n = o->op_next; + while (n && ( n->op_type == OP_NULL + || n->op_type == OP_LIST)) + n = n->op_next; + assert(n && n->op_type == OP_LEAVE); +#endif + o->op_private &= ~OPpDEREF; + is_deref = FALSE; + } + if (is_deref) { ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_PARENS))); @@ -14393,8 +14480,9 @@ Perl_rpeep(pTHX_ OP *o) && ( kid->op_targ == OP_NEXTSTATE || kid->op_targ == OP_DBSTATE )) || kid->op_type == OP_STUB - || kid->op_type == OP_ENTER); - nullop->op_next = kLISTOP->op_next; + || kid->op_type == OP_ENTER + || (PL_parser && PL_parser->error_count)); + nullop->op_next = kid->op_next; DEFER(nullop->op_next); } diff --git a/op.h b/op.h index 90f63e3..5a29bfb 100644 --- a/op.h +++ b/op.h @@ -327,6 +327,10 @@ struct pmop { * other end instead; this preserves binary compatibility. */ #define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2) +/* Set by the parser if it discovers an error, so the regex shouldn't be + * compiled */ +#define PMf_HAS_ERROR (1U<<(PMf_BASE_SHIFT+4)) + /* 'use re "taint"' in scope: taint $1 etc. if target tainted */ #define PMf_RETAINT (1U<<(PMf_BASE_SHIFT+5)) @@ -475,6 +479,24 @@ struct loop { #define kLOOP cLOOPx(kid) +typedef enum { + OPclass_NULL, /* 0 */ + OPclass_BASEOP, /* 1 */ + OPclass_UNOP, /* 2 */ + OPclass_BINOP, /* 3 */ + OPclass_LOGOP, /* 4 */ + OPclass_LISTOP, /* 5 */ + OPclass_PMOP, /* 6 */ + OPclass_SVOP, /* 7 */ + OPclass_PADOP, /* 8 */ + OPclass_PVOP, /* 9 */ + OPclass_LOOP, /* 10 */ + OPclass_COP, /* 11 */ + OPclass_METHOP, /* 12 */ + OPclass_UNOP_AUX /* 13 */ +} OPclass; + + #ifdef USE_ITHREADS # define cGVOPx_gv(o) ((GV*)PAD_SVl(cPADOPx(o)->op_padix)) # ifndef PERL_CORE @@ -909,7 +931,10 @@ Return a short description of the provided OP. =for apidoc Am|U32|OP_CLASS|OP *o Return the class of the provided OP: that is, which of the *OP structures it uses. For core ops this currently gets the information out -of C, which does not always accurately reflect the type used. +of C, which does not always accurately reflect the type used; +in v5.26 onwards, see also the function C> which can do a better +job of determining the used type. + For custom ops the type is returned from the registration, and it is up to the registree to ensure it is accurate. The value returned will be one of the C* constants from F. diff --git a/parser.h b/parser.h index ad148c2..4187e0a 100644 --- a/parser.h +++ b/parser.h @@ -115,6 +115,8 @@ typedef struct yy_parser { IV sig_optelems; /* number of optional signature elems seen */ char sig_slurpy; /* the sigil of the slurpy var (or null) */ + bool recheck_utf8_validity; + PERL_BITFIELD16 in_pod:1; /* lexer is within a =pod section */ PERL_BITFIELD16 filtered:1; /* source filters in evalbytes */ PERL_BITFIELD16 saw_infix_sigil:1; /* saw & or * or % operator */ diff --git a/patchlevel.h b/patchlevel.h index fbf74f9..152236e 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -15,7 +15,7 @@ #define PERL_REVISION 5 /* age */ #define PERL_VERSION 25 /* epoch */ -#define PERL_SUBVERSION 9 /* generation */ +#define PERL_SUBVERSION 10 /* generation */ /* The following numbers describe the earliest compatible version of Perl ("compatibility" here being defined as sufficient binary/API @@ -36,7 +36,7 @@ */ #define PERL_API_REVISION 5 #define PERL_API_VERSION 25 -#define PERL_API_SUBVERSION 9 +#define PERL_API_SUBVERSION 10 /* XXX Note: The selection of non-default Configure options, such as -Duselonglong may invalidate these settings. Currently, Configure diff --git a/perl.c b/perl.c index 09eb2f4..658f260 100644 --- a/perl.c +++ b/perl.c @@ -1580,7 +1580,6 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env) * the original argv[0]. (See below for 'contiguous', though.) * --jhi */ const char *s = NULL; - int i; const UV mask = ~(UV)(PTRSIZE-1); /* Do the mask check only if the args seem like aligned. */ const UV aligned = @@ -1596,6 +1595,7 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env) * like the argv[] interleaved with some other data, we are * fine. (Did I just evoke Murphy's Law?) --jhi */ if (PL_origargv && PL_origargc >= 1 && (s = PL_origargv[0])) { + int i; while (*s) s++; for (i = 1; i < PL_origargc; i++) { if ((PL_origargv[i] == s + 1 @@ -1629,6 +1629,7 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env) INT2PTR(char *, PTR2UV(s + PTRSIZE) & mask))) ) { + int i; #ifndef OS2 /* ENVIRON is read by the kernel too. */ s = PL_origenviron[0]; while (*s) s++; @@ -2374,12 +2375,7 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit) SETERRNO(0,SS_NORMAL); if (yyparse(GRAMPROG) || PL_parser->error_count) { - if (PL_minus_c) - Perl_croak(aTHX_ "%s had compilation errors.\n", PL_origfilename); - else { - Perl_croak(aTHX_ "Execution of %s aborted due to compilation errors.\n", - PL_origfilename); - } + abort_execution("", PL_origfilename); } CopLINE_set(PL_curcop, 0); SET_CURSTASH(PL_defstash); diff --git a/perl.h b/perl.h index d832db4..867c300 100644 --- a/perl.h +++ b/perl.h @@ -3793,7 +3793,7 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) # define USEMYBINMODE /**/ # include /* for setmode() prototype */ # define my_binmode(fp, iotype, mode) \ - (PerlLIO_setmode(fileno(fp), mode) != -1 ? TRUE : FALSE) + cBOOL(PerlLIO_setmode(fileno(fp), mode) != -1) #endif #ifdef __CYGWIN__ diff --git a/perlio.c b/perlio.c index ad1c6fe..3e936a2 100644 --- a/perlio.c +++ b/perlio.c @@ -1314,7 +1314,7 @@ PerlIO_binmode(pTHX_ PerlIO *f, int iotype, int mode, const char *names) (for example :unix which is never going to call them) it can do the flush when it is pushed. */ - return PerlIO_apply_layers(aTHX_ f, NULL, names) == 0 ? TRUE : FALSE; + return cBOOL(PerlIO_apply_layers(aTHX_ f, NULL, names) == 0); } else { /* Fake 5.6 legacy of using this call to turn ON O_TEXT */ @@ -1355,7 +1355,7 @@ PerlIO_binmode(pTHX_ PerlIO *f, int iotype, int mode, const char *names) /* Legacy binmode is now _defined_ as being equivalent to pushing :raw So code that used to be here is now in PerlIORaw_pushed(). */ - return PerlIO_push(aTHX_ f, PERLIO_FUNCS_CAST(&PerlIO_raw), NULL, NULL) ? TRUE : FALSE; + return cBOOL(PerlIO_push(aTHX_ f, PERLIO_FUNCS_CAST(&PerlIO_raw), NULL, NULL)); } } @@ -3231,7 +3231,7 @@ PerlIOStdio_invalidate_fileno(pTHX_ FILE *f) structure at all */ # else - f->_file = -1; + PERLIO_FILE_file(f) = -1; # endif return 1; # else diff --git a/perly.c b/perly.c index 1c018bb..9911a8a 100644 --- a/perly.c +++ b/perly.c @@ -347,11 +347,11 @@ Perl_yyparse (pTHX_ int gramtype) * Although it's not designed for this purpose, we can use * NATIVE_TO_UNI here. It returns its argument on ASCII * platforms, and on EBCDIC translates native to ascii in - * the 0-255 range, leaving everything else unchanged. - * This jibes with yylex() returning some bare characters - * in that range, but all tokens it returns are either 0, - * or above 255. There could be a problem if NULs weren't - * 0, or were ever returned as raw chars by yylex() */ + * the 0-255 range, leaving every other possible input + * unchanged. This jibes with yylex() returning some bare + * characters in that range, but all tokens it returns are + * either 0, or above 255. There could be a problem if NULs + * weren't 0, or were ever returned as raw chars by yylex() */ yytoken = YYTRANSLATE(NATIVE_TO_UNI(parser->yychar)); } diff --git a/plan9/config.plan9 b/plan9/config.plan9 index 6b03c08..d2e57b2 100644 --- a/plan9/config.plan9 +++ b/plan9/config.plan9 @@ -3329,8 +3329,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/sys/lib/perl/5.25.9" /**/ -#define PRIVLIB_EXP "/sys/lib/perl/5.25.9" /**/ +#define PRIVLIB "/sys/lib/perl/5.25.10" /**/ +#define PRIVLIB_EXP "/sys/lib/perl/5.25.10" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -3457,9 +3457,9 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "/sys/lib/perl/5.25.9/site_perl" /**/ -#define SITELIB_EXP "/sys/lib/perl/5.25.9/site_perl" /**/ -#define SITELIB_STEM "/sys/lib/perl/5.25.9/site_perl" /**/ +#define SITELIB "/sys/lib/perl/5.25.10/site_perl" /**/ +#define SITELIB_EXP "/sys/lib/perl/5.25.10/site_perl" /**/ +#define SITELIB_STEM "/sys/lib/perl/5.25.10/site_perl" /**/ /* Size_t_size: * This symbol holds the size of a Size_t in bytes. diff --git a/plan9/config_sh.sample b/plan9/config_sh.sample index c9b4735..dac0cfd 100644 --- a/plan9/config_sh.sample +++ b/plan9/config_sh.sample @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/uname -n' api_revision='5' -api_subversion='9' +api_subversion='10' api_version='25' -api_versionstring='5.25.9' +api_versionstring='5.25.10' ar='ar' -archlib='/sys/lib/perl5/5.25.9/386' -archlibexp='/sys/lib/perl5/5.25.9/386' +archlib='/sys/lib/perl5/5.25.10/386' +archlibexp='/sys/lib/perl5/5.25.10/386' archname64='' archname='386' archobjs='' @@ -820,17 +820,17 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='/sys/lib/perl/5.25.9/386' +installarchlib='/sys/lib/perl/5.25.10/386' installbin='/usr/bin' installman1dir='/sys/man/1pub' installman3dir='/sys/man/2pub' installprefix='/usr' installprefixexp='/usr' -installprivlib='/sys/lib/perl/5.25.9' +installprivlib='/sys/lib/perl/5.25.10' installscript='/usr/bin' -installsitearch='/sys/lib/perl/5.25.9/site_perl/386' +installsitearch='/sys/lib/perl/5.25.10/site_perl/386' installsitebin='/usr/bin' -installsitelib='/sys/lib/perl/5.25.9/site_perl' +installsitelib='/sys/lib/perl/5.25.10/site_perl' installstyle='lib/perl5' installusrbinperl='undef' installvendorarch='' @@ -955,8 +955,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/sys/lib/perl/5.25.9' -privlibexp='/sys/lib/perl/5.25.9' +privlib='/sys/lib/perl/5.25.10' +privlibexp='/sys/lib/perl/5.25.10' procselfexe='' prototype='define' ptrsize='4' @@ -1021,13 +1021,13 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0' sig_size='50' signal_t='void' -sitearch='/sys/lib/perl/5.25.9/site_perl/386' +sitearch='/sys/lib/perl/5.25.10/site_perl/386' sitearchexp='/sys/lib/perl/site_perl/386' sitebin='/usr/bin' sitebinexp='/usr/bin' -sitelib='/sys/lib/perl/5.25.9/site_perl' -sitelib_stem='/sys/lib/perl/5.25.9/site_perl' -sitelibexp='/sys/lib/perl/5.25.9/site_perl' +sitelib='/sys/lib/perl/5.25.10/site_perl' +sitelib_stem='/sys/lib/perl/5.25.10/site_perl' +sitelibexp='/sys/lib/perl/5.25.10/site_perl' siteprefix='/usr' siteprefixexp='/usr' sizesize='4' @@ -1060,7 +1060,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/sys/include/ape/string.h' submit='' -subversion='9' +subversion='10' sysman='/sys/man/1pub' tail='' tar='' @@ -1142,8 +1142,8 @@ vendorlib_stem='' vendorlibexp='' vendorprefix='' vendorprefixexp='' -version='5.25.9' -version_patchlevel_string='version 25 subversion 9' +version='5.25.10' +version_patchlevel_string='version 25 subversion 10' versiononly='undef' vi='' xlibpth='' @@ -1157,9 +1157,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=25 -PERL_SUBVERSION=9 +PERL_SUBVERSION=10 PERL_API_REVISION=5 PERL_API_VERSION=25 -PERL_API_SUBVERSION=9 +PERL_API_SUBVERSION=10 PERL_PATCHLEVEL= PERL_CONFIG_SH=true diff --git a/pod/.gitignore b/pod/.gitignore index 14ac7be..45712a9 100644 --- a/pod/.gitignore +++ b/pod/.gitignore @@ -50,7 +50,7 @@ /roffitall # generated -/perl5259delta.pod +/perl52510delta.pod /perlapi.pod /perlintern.pod /perlmodlib.pod diff --git a/pod/perl.pod b/pod/perl.pod index 7ba8f61..95ab755 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -181,6 +181,7 @@ aux h2ph h2xs perlbug pl2pm pod2html pod2man splain xsubpp perlhist Perl history records perldelta Perl changes since previous version + perl5259delta Perl changes in version 5.25.9 perl5258delta Perl changes in version 5.25.8 perl5257delta Perl changes in version 5.25.7 perl5256delta Perl changes in version 5.25.6 diff --git a/pod/perl5259delta.pod b/pod/perl5259delta.pod new file mode 100644 index 0000000..230a0b3 --- /dev/null +++ b/pod/perl5259delta.pod @@ -0,0 +1,678 @@ +=encoding utf8 + +=head1 NAME + +perl5259delta - what is new for perl v5.25.9 + +=head1 DESCRIPTION + +This document describes differences between the 5.25.8 release and the 5.25.9 +release. + +If you are upgrading from an earlier release such as 5.25.7, first read +L, which describes differences between 5.25.7 and 5.25.8. + +=head1 Core Enhancements + +=head2 New regular expression modifier C + +Specifying two C characters to modify a regular expression pattern +does everything that a single one does, but additionally TAB and SPACE +characters within a bracketed character class are generally ignored and +can be added to improve readability, like +S>. Details are at +Lx and Exx>. + +=head1 Deprecations + +=head2 String delimiters that aren't stand-alone graphemes are now deprecated + +In order for Perl to eventually allow string delimiters to be Unicode +grapheme clusters (which look like a single character, but may be +a sequence of several ones), we have to stop allowing a single char +delimiter that isn't a grapheme by itself. These are unlikely to exist +in actual code, as they would typically display as attached to the +character in front of them. + +=head1 Performance Enhancements + +=over 4 + +=item * + +A hash in boolean context is now sometimes faster, e.g. + + if (!%h) { ... } + +This was already special-cased, but some cases were missed, and even the +ones which weren't have been improved. + +=item * + +Several other ops may now also be faster in boolean context. + +=back + +=head1 Modules and Pragmata + +=head2 Updated Modules and Pragmata + +=over 4 + +=item * + +L has been upgraded from version 0.28 to 0.29. + +The deprecation message for the C<:unique> and C<:locked> attributes +now mention they will disappear in Perl 5.28. + +=item * + +L has been upgraded from version 1.39 to 1.40. + +=item * + +L has been upgraded from version 1.05 to 1.06. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 2.069 to 2.070. + +=item * + +L has been upgraded from version 2.069 to 2.070. + +=item * + +L has been upgraded from version 2.14_01 to 2.16. + +=item * + +L was upgraded from version 2.166 to 2.167. + +This fixes a stack management bug. [perl #130487]. + +=item * + +L has been upgraded from version 1.838 to 1.840. + +=item * + +L has been upgraded from version 1.05 to 1.06. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.35 to 1.36. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.40 to 1.41. + +=item * + +L has been upgraded from version 1.27 to 1.28. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.33 to 1.34. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.45 to 1.46. + +Fixes the Unicode Bug in the range operator. + +=item * + +L has been upgraded from version 1.27 to 1.28. + +Issue a deprecation message for C. + +=item * + +L has been upgraded from version 0.41 to 0.42. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 0.63 to 0.64. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 5.20161220 to 5.20170120. + +=item * + +L has been upgraded from version 1.11 to 1.12. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.27 to 1.28. + +=item * + +L has been upgraded from version 1.50 to 1.51. + +Ignore F on non-Unix systems. [perl #113960] + +=item * + +L has been upgraded from version 0.25 to 0.26. + +=item * + +L has been upgraded from version 1.2201 to 1.2202. + +=item * + +L has been upgraded from version 0.33 to 0.34 + +This adds support for the new Lxx>|perlre/Ex and Exx> +regular expression pattern modifier, and a change to the L>|re/'strict' mode> experimental feature. When S> is enabled, a warning now will be generated for all +unescaped uses of the two characters C<}> and C<]> in regular +expression patterns (outside bracketed character classes) that are taken +literally. This brings them more in line with the C<)> character which +is always a metacharacter unless escaped. Being a metacharacter only +sometimes, depending on action at a distance, can lead to silently +having the pattern mean something quite different than was intended, +which the S> mode is intended to minimize. + +=item * + +L has been upgraded from version 2.59 to 2.61. + +Fixes [perl #130098]. + +=item * + +L has been upgraded from version 1.07 to 1.08. + +=item * + +L has been upgraded from version 1.15 to 1.16. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.29 to 1.30. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 2.10 to 2.12. + +Fixes [perl #130469]. + +=item * + +L has been upgraded from version 1.52 to 1.54. + +This fixes [cpan #119529], [perl #130457] + +=item * + +L has been upgraded from version 0.67 to 0.68. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 1.07 to 1.08. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=item * + +L has been upgraded from version 0.24 to 0.26. + +It now uses 3-arg C instead of 2-arg C. [perl #130122] + +=back + +=head1 Documentation + +=head2 New Documentation + +=head3 L + +This file documents all upcoming deprecations, and some of the deprecations +which already have been removed. The purpose of this documentation is +two-fold: document what will disappear, and by which version, and serve +as a guide for people dealing with code which has features that no longer +work after an upgrade of their perl. + +=head1 Diagnostics + +The following additions or changes have been made to diagnostic output, +including warnings and fatal error messages. For the complete list of +diagnostic messages, see L. + +=head2 New Diagnostics + +=head3 New Warnings + +=over 4 + +=item * + +L + +See L + +=back + +=head2 Changes to Existing Diagnostics + +As of Perl 5.25.9, all new deprecations will come with a version in +which the feature will disappear. And with a few exceptions, most +existing deprecations will state when they'll disappear. As such, most +deprecation messages have changed. + +=over 4 + +=item * + +Attribute "locked" is deprecated, and will disappear in Perl 5.28 + +=item * + +Attribute "unique" is deprecated, and will disappear in Perl 5.28 + +=item * + +"\c%c" is more clearly written simply as "%s". This will be a fatal error +in Perl 5.28 + +=item * + +Constants from lexical variables potentially modified elsewhere are +deprecated. This will not be allowed in Perl 5.32 + +=item * + +Deprecated use of my() in false conditional. This will be a fatal error +in Perl 5.30 + +=item * + +dump() better written as CORE::dump(). dump() will no longer be available +in Perl 5.30 + +=item * + +${^ENCODING} is no longer supported. Its use will be fatal in Perl 5.28 + +=item * + +File::Glob::glob() will disappear in perl 5.30. Use File::Glob::bsd_glob() +instead. + +=item * + +%s() is deprecated on :utf8 handles. This will be a fatal error in Perl 5.30 + +=item * + +$* is no longer supported. Its use will be fatal in Perl 5.30 + +=item * + +$* is no longer supported. Its use will be fatal in Perl 5.30 + +=item * + +Opening dirhandle %s also as a file. This will be a fatal error in Perl 5.28 + +=item * + +Opening filehandle %s also as a directory. This will be a fatal +error in Perl 5.28 + +=item * + +Setting $/ to a reference to %s as a form of slurp is deprecated, +treating as undef. This will be fatal in Perl 5.28 + +=item * + +Unescaped left brace in regex is deprecated here (and will be fatal +in Perl 5.30), passed through in regex; marked by S<< E-- HERE >> in m/%s/ + +=item * + +Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 + +=item * + +Use of bare EE to mean EE"" is deprecated. Its use will be fatal in Perl 5.28 + +=item * + +Use of code point 0x%s is deprecated; the permissible max is 0x%s. +This will be fatal in Perl 5.28 + +=item * + +Use of comma-less variable list is deprecated. Its use will be fatal +in Perl 5.28 + +=item * + +Use of inherited AUTOLOAD for non-method %s() is deprecated. This +will be fatal in Perl 5.28 + +=item * + +Use of strings with code points over 0xFF as arguments to %s operator +is deprecated. This will be a fatal error in Perl 5.28 + +=back + +=head1 Utility Changes + +=head2 F and F + +=over 4 + +=item * + +These old utilities have long since superceded by L, and are +now gone from the distribution. + +=back + +=head2 F + +=over 4 + +=item * + +Many improvements + +=back + + +=head1 Configuration and Compilation + +=over 4 + +=item * + +The C build process has further changes: + +=over + +=item * + +If the C<-xnolibs> is available, use that so a F perl can be +built within a FreeBSD jail. + +=item * + +On systems that build a dtrace object file (FreeBSD, Solaris and +SystemTap's dtrace emulation), copy the input objects to a separate +directory and process them there, and use those objects in the link, +since C also modifies these objects. + +=item * + +Add libelf to the build on FreeBSD 10.x, since dtrace adds references +to libelf symbols. + +=item * + +Generate a dummy dtrace_main.o if C fails to build it. A +default build on Solaris generates probes from the unused inline +functions, while they don't on FreeBSD, which causes C to +fail. + +=back + +[perl #130108] + +=item * + +You can now disable perl's use of the PERL_HASH_SEED and +PERL_PERTURB_KEYS environment variables by configuring perl with +C<-Accflags=NO_PERL_HASH_ENV>. + +=item * + +You can now disable perl's use of the PERL_HASH_SEED_DEBUG environment +variable by configuring perl with +C<-Accflags=-DNO_PERL_HASH_SEED_DEBUG>. + +=back + +=head1 Internal Changes + +=over 4 + +=item * + +New versions of macros like C and C have +been added, each with the +suffix C<_safe>, like C. These take an extra +parameter, giving an upper limit of how far into the string it is safe +to read. Using the old versions could cause attempts to read beyond the +end of the input buffer if the UTF-8 is not well-formed, and their use +now raises a deprecation warning. Details are at +L. + +=item * + +Calling macros like C on malformed UTF-8 have issued a +deprecation warning since Perl v5.18. They now die. +Similarly, macros like C on malformed UTF-8 now die. + +=item * + +Calling the functions C and its derivatives, while +passing a string length of 0 is now asserted against in DEBUGGING +builds, and otherwise returns the Unicode REPLACEMENT CHARACTER. If +you have nothing to decode, you shouldn't call the decode function. + +=item * + +The functions C and its derivatives now return the +Unicode REPLACEMENT CHARACTER if called with UTF-8 that has the overlong +malformation, and that malformation is allowed by the input parameters. +This malformation is where the UTF-8 looks valid syntactically, but +there is a shorter sequence that yields the same code point. This has +been forbidden since Unicode version 3.1. + +=item * + +The functions C and its derivatives now accept an input +flag to allow the overflow malformation. This malformation is when the +UTF-8 may be syntactically valid, but the code point it represents is +not capable of being represented in the word length on the platform. +What "allowed" means in this case is that the function doesn't return an +error, and advances the parse pointer to beyond the UTF-8 in question, +but it returns the Unicode REPLACEMENT CHARACTER as the value of the +code point (since the real value is not representable). + +=back + +=head1 Selected Bug Fixes + +=over 4 + +=item * + +Under C, the entire Perl program is now checked that the UTF-8 +is wellformed. This resolves [perl #126310]. + +=item * + +The range operator C<..> on strings now handles its arguments correctly when in +the scope of the L<< C|feature/"The 'unicode_strings' feature" >> +feature. The previous behaviour was sufficiently unexpected that we believe no +correct program could have made use of it. + +=item * + +The S operator did not ensure enough space was allocated for +its return value in scalar context. It could then write a single +pointer immediately beyond the end of the memory block allocated for +the stack. [perl #130262] + +=item * + +Using a large code point with the C pack template character with +the current output position aligned at just the right point could +cause a write a single zero byte immediately beyond the end of an +allocated buffer. [perl #129149] + +=item * + +Supplying the form picture argument as part of the form argument list +where the picture specifies modifying the argument could cause an +access to the new freed compiled form. [perl #129125] + +=item * + +Fix a problem with sort's build-in compare, where it would not sort +correctly with 64-bit integers, and non-long doubles. [perl #130335] + +=item * + +Fix issues with /(?{ ... EEEOF })/ that broke Method-Signatures. [perl #130398] + +=item * + +Fix a macro which caused syntax error on an EBCDIC build. + +=item * + +Prevent tests from getting hung up on 'NonStop' option. [perl #130445] + +=item * + +Fixed an assertion failure with C and C, which +could be triggered by C. [perl #130198]. + +=item * + +Fixed a comment skipping error under C; it could stop skipping a +byte early, which could be in the middle of a UTF-8 character. +[perl #130495]. + +=item * + +F now ignores F on non-Unix systems. [perl #113960]; + +=item * + +Fix assertion failure for C<{}-E$x> when C<$x> isn't defined. [perl #130496]. + +=item * + +DragonFly BSD now has support for setproctitle(). [perl #130068]. + +=item * + +Fix an assertion error which could be triggered when lookahead string +in patterns exceeded a minimum length. [perl #130522]. + +=item * + +Only warn once per literal about a misplaced C<_>. [perl #70878]. + +=item * + +Ensure range-start is set after error in C. [perl #129342]. + +=item * + +Don't read past start of string for unmatched backref; otherwise, +we may have heap buffer overflow. [perl #129377]. + +=item * + +Properly recognize mathematical digit ranges starting at U+1D7E. +C is supposed to warn if you use a range whose start +and end digit aren't from the same group of 10. It didn't do that +for five groups of mathematical digits starting at U+1D7E. + +=back + + +=head1 Acknowledgements + +Perl 5.25.9 represents approximately 4 weeks of development since Perl 5.25.8 +and contains approximately 24,000 lines of changes across 400 files from 23 +authors. + +Excluding auto-generated files, documentation and release tools, there were +approximately 17,000 lines of changes to 220 .pm, .t, .c and .h files. + +Perl continues to flourish into its third decade thanks to a vibrant community +of users and developers. The following people are known to have contributed the +improvements that became Perl 5.25.9: + +Aaron Crane, Abigail, Andreas König, Andy Lester, Aristotle Pagaltzis +Chris 'BinGOs' Williams, Craig A. Berry, Dagfinn Ilmari MannsÃ¥ker, Dan Collins, +David Mitchell, Father Chrysostomos, Hugo van der Sanden, James E Keenan, +Jerry D. Hedden, John Lightsey, Karl Williamson, Paul Marquess, Peter Avalos, +Sawyer X, Steve Hay, Tomasz Konojacki, Tony Cook, Zefram. + +The list above is almost certainly incomplete as it is automatically generated +from version control history. In particular, it does not include the names of +the (very much appreciated) contributors who reported issues to the Perl bug +tracker. + +Many of the changes included in this version originated in the CPAN modules +included in Perl's core. We're grateful to the entire CPAN community for +helping Perl to flourish. + +For a more complete list of all of Perl's historical contributors, please see +the F file in the Perl source distribution. + +=head1 Reporting Bugs + +If you find what you think is a bug, you might check the perl bug database +at L . There may also be information at +L , the Perl Home Page. + +If you believe you have an unreported bug, please run the L program +included with your release. Be sure to trim your bug down to a tiny but +sufficient test case. Your bug report, along with the output of C, +will be sent off to perlbug@perl.org to be analysed by the Perl porting team. + +If the bug you are reporting has security implications which make it +inappropriate to send to a publicly archived mailing list, then see +L +for details of how to report the issue. + +=head1 SEE ALSO + +The F file for an explanation of how to view exhaustive details on +what changed. + +The F file for how to build Perl. + +The F file for general stuff. + +The F and F files for copyright information. + +=cut diff --git a/pod/perldelta.pod b/pod/perldelta.pod index cab65be..df50ab1 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -2,56 +2,15 @@ =head1 NAME -perldelta - what is new for perl v5.25.9 +perldelta - what is new for perl v5.25.10 =head1 DESCRIPTION -This document describes differences between the 5.25.8 release and the 5.25.9 +This document describes differences between the 5.25.9 release and the 5.25.10 release. -If you are upgrading from an earlier release such as 5.25.7, first read -L, which describes differences between 5.25.7 and 5.25.8. - -=head1 Core Enhancements - -=head2 New regular expression modifier C - -Specifying two C characters to modify a regular expression pattern -does everything that a single one does, but additionally TAB and SPACE -characters within a bracketed character class are generally ignored and -can be added to improve readability, like -S>. Details are at -Lx and Exx>. - -=head1 Deprecations - -=head2 String delimiters that aren't stand-alone graphemes are now deprecated - -In order for Perl to eventually allow string delimiters to be Unicode -grapheme clusters (which look like a single character, but may be -a sequence of several ones), we have to stop allowing a single char -delimiter that isn't a grapheme by itself. These are unlikely to exist -in actual code, as they would typically display as attached to the -character in front of them. - -=head1 Performance Enhancements - -=over 4 - -=item * - -A hash in boolean context is now sometimes faster, e.g. - - if (!%h) { ... } - -This was already special-cased, but some cases were missed, and even the -ones which weren't have been improved. - -=item * - -Several other ops may now also be faster in boolean context. - -=back +If you are upgrading from an earlier release such as 5.25.8, first read +L, which describes differences between 5.25.8 and 5.25.9. =head1 Modules and Pragmata @@ -61,406 +20,108 @@ Several other ops may now also be faster in boolean context. =item * -L has been upgraded from version 0.28 to 0.29. - -The deprecation message for the C<:unique> and C<:locked> attributes -now mention they will disappear in Perl 5.28. - -=item * - -L has been upgraded from version 1.39 to 1.40. - -=item * - -L has been upgraded from version 1.05 to 1.06. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 2.069 to 2.070. - -=item * - -L has been upgraded from version 2.069 to 2.070. - -=item * - -L has been upgraded from version 2.14_01 to 2.16. - -=item * - -L was upgraded from version 2.166 to 2.167. - -This fixes a stack management bug. [perl #130487]. - -=item * - -L has been upgraded from version 1.838 to 1.840. - -=item * - -L has been upgraded from version 1.05 to 1.06. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 1.35 to 1.36. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 1.40 to 1.41. - -=item * - -L has been upgraded from version 1.27 to 1.28. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 1.33 to 1.34. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 1.45 to 1.46. - -Fixes the Unicode Bug in the range operator. - -=item * - -L has been upgraded from version 1.27 to 1.28. - -Issue a deprecation message for C. - -=item * - -L has been upgraded from version 0.41 to 0.42. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 0.63 to 0.64. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 5.20161220 to 5.20170120. - -=item * - -L has been upgraded from version 1.11 to 1.12. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] - -=item * - -L has been upgraded from version 1.27 to 1.28. - -=item * - -L has been upgraded from version 1.50 to 1.51. - -Ignore F on non-Unix systems. [perl #113960] - -=item * - -L has been upgraded from version 0.25 to 0.26. - -=item * - -L has been upgraded from version 1.2201 to 1.2202. - -=item * - -L has been upgraded from version 0.33 to 0.34 - -This adds support for the new Lxx>|perlre/Ex and Exx> -regular expression pattern modifier, and a change to the L>|re/'strict' mode> experimental feature. When S> is enabled, a warning now will be generated for all -unescaped uses of the two characters C<}> and C<]> in regular -expression patterns (outside bracketed character classes) that are taken -literally. This brings them more in line with the C<)> character which -is always a metacharacter unless escaped. Being a metacharacter only -sometimes, depending on action at a distance, can lead to silently -having the pattern mean something quite different than was intended, -which the S> mode is intended to minimize. +L has been upgraded from version 1.65 to 1.68. =item * -L has been upgraded from version 2.59 to 2.61. - -Fixes [perl #130098]. +L has been upgraded from version 2.16 to 2.17. =item * -L has been upgraded from version 1.07 to 1.08. +L has been upgraded from version 1.46 to 1.47. =item * -L has been upgraded from version 1.15 to 1.16. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] +L has been upgraded from version 1.11 to 1.12. =item * -L has been upgraded from version 1.29 to 1.30. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] +L has been upgraded from version 5.20170120 to 5.20170220. =item * -L has been upgraded from version 2.10 to 2.12. - -Fixes [perl #130469]. +L has been upgraded from version 1.10 to 1.11. =item * -L has been upgraded from version 1.52 to 1.54. - -This fixes [cpan #119529], [perl #130457] +L has been upgraded from version 1.09 to 1.10. =item * -L has been upgraded from version 0.67 to 0.68. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] +L has been upgraded from version 2.61 to 2.62. =item * -L has been upgraded from version 1.07 to 1.08. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] +L has been upgraded from version 3.11 to 3.12. =item * -L has been upgraded from version 0.24 to 0.26. - -It now uses 3-arg C instead of 2-arg C. [perl #130122] +L has been upgraded from version 2.12 to 2.13. =back -=head1 Documentation - -=head2 New Documentation - -=head3 L - -This file documents all upcoming deprecations, and some of the deprecations -which already have been removed. The purpose of this documentation is -two-fold: document what will disappear, and by which version, and serve -as a guide for people dealing with code which has features that no longer -work after an upgrade of their perl. - =head1 Diagnostics The following additions or changes have been made to diagnostic output, including warnings and fatal error messages. For the complete list of diagnostic messages, see L. -=head2 New Diagnostics - -=head3 New Warnings - -=over 4 - -=item * - -L - -See L - -=back - =head2 Changes to Existing Diagnostics -As of Perl 5.25.9, all new deprecations will come with a version in -which the feature will disappear. And with a few exceptions, most -existing deprecations will state when they'll disappear. As such, most -deprecation messages have changed. - =over 4 =item * -Attribute "locked" is deprecated, and will disappear in Perl 5.28 - -=item * - -Attribute "unique" is deprecated, and will disappear in Perl 5.28 +Use of unassigned code point or non-standalone grapheme for a delimiter will be a fatal error starting in Perl 5.30 -=item * - -"\c%c" is more clearly written simply as "%s". This will be a fatal error -in Perl 5.28 - -=item * - -Constants from lexical variables potentially modified elsewhere are -deprecated. This will not be allowed in Perl 5.32 - -=item * - -Deprecated use of my() in false conditional. This will be a fatal error -in Perl 5.30 - -=item * - -dump() better written as CORE::dump(). dump() will no longer be available -in Perl 5.30 - -=item * - -${^ENCODING} is no longer supported. Its use will be fatal in Perl 5.28 - -=item * - -File::Glob::glob() will disappear in perl 5.30. Use File::Glob::bsd_glob() -instead. - -=item * - -%s() is deprecated on :utf8 handles. This will be a fatal error in Perl 5.30 - -=item * - -$* is no longer supported. Its use will be fatal in Perl 5.30 - -=item * - -$* is no longer supported. Its use will be fatal in Perl 5.30 - -=item * - -Opening dirhandle %s also as a file. This will be a fatal error in Perl 5.28 - -=item * - -Opening filehandle %s also as a directory. This will be a fatal -error in Perl 5.28 - -=item * - -Setting $/ to a reference to %s as a form of slurp is deprecated, -treating as undef. This will be fatal in Perl 5.28 - -=item * - -Unescaped left brace in regex is deprecated here (and will be fatal -in Perl 5.30), passed through in regex; marked by S<< E-- HERE >> in m/%s/ - -=item * - -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 - -=item * - -Use of bare EE to mean EE"" is deprecated. Its use will be fatal in Perl 5.28 - -=item * - -Use of code point 0x%s is deprecated; the permissible max is 0x%s. -This will be fatal in Perl 5.28 - -=item * - -Use of comma-less variable list is deprecated. Its use will be fatal -in Perl 5.28 - -=item * - -Use of inherited AUTOLOAD for non-method %s() is deprecated. This -will be fatal in Perl 5.28 +This was changed to drop a leading C in C, so it uses the same +style as other deprecation messages. =item * -Use of strings with code points over 0xFF as arguments to %s operator -is deprecated. This will be a fatal error in Perl 5.28 - -=back - -=head1 Utility Changes - -=head2 F and F - -=over 4 - -=item * - -These old utilities have long since superceded by L, and are -now gone from the distribution. - -=back - -=head2 F - -=over 4 - -=item * +"\c%c" is more clearly written simply as "%s". -Many improvements +It was decided to undeprecate the use of "\c%c", see L =back +=head1 Platform Support -=head1 Configuration and Compilation +=head2 Platform-Specific Notes =over 4 -=item * - -The C build process has further changes: - -=over +=item Windows -=item * - -If the C<-xnolibs> is available, use that so a F perl can be -built within a FreeBSD jail. +=over 4 =item * -On systems that build a dtrace object file (FreeBSD, Solaris and -SystemTap's dtrace emulation), copy the input objects to a separate -directory and process them there, and use those objects in the link, -since C also modifies these objects. - -=item * +Support for compiling perl on Windows using Microsoft Visual Studio 2015 +(containing Visual C++ 14.0) has been added. -Add libelf to the build on FreeBSD 10.x, since dtrace adds references -to libelf symbols. +This version of VC++ includes a completely rewritten C run-time library, some +of the changes in which mean that work done to resolve a socket close() bug in +perl #120091 and perl #118059 is not workable in its current state with this +version of VC++. Therefore, we have effectively reverted that bug fix for +VS2015 onwards on the basis that being able to build with VS2015 onwards is +more important than keeping the bug fix. We may revisit this in the future to +attempt to fix the bug again in a way that is compatible with VS2015. -=item * +These changes do not affect compilation with GCC or with Visual Studio versions +up to and including VS2013, i.e. the bug fix is retained (unchanged) for those +compilers. -Generate a dummy dtrace_main.o if C fails to build it. A -default build on Solaris generates probes from the unused inline -functions, while they don't on FreeBSD, which causes C to -fail. +Note that you may experience compatibility problems if you mix a perl built +with GCC or VS E= VS2013 with XS modules built with VS2015, or if you mix a +perl built with VS2015 with XS modules built with GCC or VS E= VS2013. +Some incompatibility may arise because of the bug fix that has been reverted +for VS2015 builds of perl, but there may well be incompatibility anyway because +of the rewritten CRT in VS2015 (e.g. see discussion at +http://stackoverflow.com/questions/30412951). =back -[perl #130108] - -=item * - -You can now disable perl's use of the PERL_HASH_SEED and -PERL_PERTURB_KEYS environment variables by configuring perl with -C<-Accflags=NO_PERL_HASH_ENV>. - -=item * - -You can now disable perl's use of the PERL_HASH_SEED_DEBUG environment -variable by configuring perl with -C<-Accflags=-DNO_PERL_HASH_SEED_DEBUG>. - =back =head1 Internal Changes @@ -469,47 +130,18 @@ C<-Accflags=-DNO_PERL_HASH_SEED_DEBUG>. =item * -New versions of macros like C and C have -been added, each with the -suffix C<_safe>, like C. These take an extra -parameter, giving an upper limit of how far into the string it is safe -to read. Using the old versions could cause attempts to read beyond the -end of the input buffer if the UTF-8 is not well-formed, and their use -now raises a deprecation warning. Details are at -L. - -=item * - -Calling macros like C on malformed UTF-8 have issued a -deprecation warning since Perl v5.18. They now die. -Similarly, macros like C on malformed UTF-8 now die. +The C API function has been added. This is like the existing +C macro, but can more accurately determine what struct an op +has been allocated as. For example C might return +C indicating that ops of this type are usually +allocated as an C or C; while C will return +C or C as appropriate. =item * -Calling the functions C and its derivatives, while -passing a string length of 0 is now asserted against in DEBUGGING -builds, and otherwise returns the Unicode REPLACEMENT CHARACTER. If -you have nothing to decode, you shouldn't call the decode function. - -=item * - -The functions C and its derivatives now return the -Unicode REPLACEMENT CHARACTER if called with UTF-8 that has the overlong -malformation, and that malformation is allowed by the input parameters. -This malformation is where the UTF-8 looks valid syntactically, but -there is a shorter sequence that yields the same code point. This has -been forbidden since Unicode version 3.1. - -=item * - -The functions C and its derivatives now accept an input -flag to allow the overflow malformation. This malformation is when the -UTF-8 may be syntactically valid, but the code point it represents is -not capable of being represented in the word length on the platform. -What "allowed" means in this case is that the function doesn't return an -error, and advances the parse pointer to beyond the UTF-8 in question, -but it returns the Unicode REPLACEMENT CHARACTER as the value of the -code point (since the real value is not representable). +The output format of the C function (as used by C) +has changed: it now displays an "ASCII-art" tree structure, and shows more +low-level details about each op, such as its address and class. =back @@ -519,122 +151,71 @@ code point (since the real value is not representable). =item * -Under C, the entire Perl program is now checked that the UTF-8 -is wellformed. This resolves [perl #126310]. - -=item * - -The range operator C<..> on strings now handles its arguments correctly when in -the scope of the L<< C|feature/"The 'unicode_strings' feature" >> -feature. The previous behaviour was sufficiently unexpected that we believe no -correct program could have made use of it. - -=item * - -The S operator did not ensure enough space was allocated for -its return value in scalar context. It could then write a single -pointer immediately beyond the end of the memory block allocated for -the stack. [perl #130262] - -=item * - -Using a large code point with the C pack template character with -the current output position aligned at just the right point could -cause a write a single zero byte immediately beyond the end of an -allocated buffer. [perl #129149] - -=item * - -Supplying the form picture argument as part of the form argument list -where the picture specifies modifying the argument could cause an -access to the new freed compiled form. [perl #129125] - -=item * - -Fix a problem with sort's build-in compare, where it would not sort -correctly with 64-bit integers, and non-long doubles. [perl #130335] - -=item * - -Fix issues with /(?{ ... EEEOF })/ that broke Method-Signatures. [perl #130398] - -=item * - -Fix a macro which caused syntax error on an EBCDIC build. - -=item * - -Prevent tests from getting hung up on 'NonStop' option. [perl #130445] - -=item * - -Fixed an assertion failure with C and C, which -could be triggered by C. [perl #130198]. - -=item * - -Fixed a comment skipping error under C; it could stop skipping a -byte early, which could be in the middle of a UTF-8 character. -[perl #130495]. +Attempting to use the deprecated variable C<$#> as the object in an +indirect object method call could cause a heap use after free or +buffer overflow. [perl #129274] =item * -F now ignores F on non-Unix systems. [perl #113960]; +When checking for an indirect object method call in some rare cases +the parser could reallocate the line buffer but then continue to use +pointers to the old buffer. [perl #129190] =item * -Fix assertion failure for C<{}-E$x> when C<$x> isn't defined. [perl #130496]. +Supplying a glob as the format argument to L would +cause an assertion failure. [perl #130722] =item * -DragonFly BSD now has support for setproctitle(). [perl #130068]. +Code like C< $value1 =~ qr/.../ ~~ $value2 > would have the match +converted into a qr// operator, leaving extra elements on the stack to +confuse any surrounding expression. [perl #130705] =item * -Fix an assertion error which could be triggered when lookahead string -in patterns exceeded a minimum length. [perl #130522]. +Since 5.24.0 in some obscure cases, a regex which included code blocks +from multiple sources (e.g. via embedded via qr// objects) could end up +with the wrong current pad and crash or give weird results. [perl #129881] =item * -Only warn once per literal about a misplaced C<_>. [perl #70878]. +Occasionally Cs in a code block within a patterns weren't being +undone when the pattern matching backtracked over the code block. +[perl #126697] =item * -Ensure range-start is set after error in C. [perl #129342]. +Using C to modify a magic variable could access freed memory +in some cases. [perl #129340] =item * -Don't read past start of string for unmatched backref; otherwise, -we may have heap buffer overflow. [perl #129377]. - -=item * - -Properly recognize mathematical digit ranges starting at U+1D7E. -C is supposed to warn if you use a range whose start -and end digit aren't from the same group of 10. It didn't do that -for five groups of mathematical digits starting at U+1D7E. +Perl 5.25.9 was fixed so that under C, the entire Perl program +is checked that the UTF-8 is wellformed. It turns out that several edge +cases were missed, and are now fixed. [perl #126310] was the original +ticket. =back - =head1 Acknowledgements -Perl 5.25.9 represents approximately 4 weeks of development since Perl 5.25.8 -and contains approximately 24,000 lines of changes across 400 files from 23 +Perl 5.25.10 represents approximately 4 weeks of development since Perl 5.25.9 +and contains approximately 12,000 lines of changes across 200 files from 25 authors. Excluding auto-generated files, documentation and release tools, there were -approximately 17,000 lines of changes to 220 .pm, .t, .c and .h files. +approximately 6,700 lines of changes to 130 .pm, .t, .c and .h files. Perl continues to flourish into its third decade thanks to a vibrant community of users and developers. The following people are known to have contributed the -improvements that became Perl 5.25.9: +improvements that became Perl 5.25.10: -Aaron Crane, Abigail, Andreas König, Andy Lester, Aristotle Pagaltzis -Chris 'BinGOs' Williams, Craig A. Berry, Dagfinn Ilmari MannsÃ¥ker, Dan Collins, -David Mitchell, Father Chrysostomos, Hugo van der Sanden, James E Keenan, -Jerry D. Hedden, John Lightsey, Karl Williamson, Paul Marquess, Peter Avalos, -Sawyer X, Steve Hay, Tomasz Konojacki, Tony Cook, Zefram. +Aaron Crane, Abigail, Andreas König, Andy Lester, Chris 'BinGOs' Williams, +Christian Millour, Colin Newell, Dagfinn Ilmari MannsÃ¥ker, David Mitchell, +Hugo van der Sanden, James E Keenan, Jarkko Hietaniemi, Jerry D. Hedden, John +Lightsey, Karl Williamson, Neil Bowers, Pali, Renee Baecker, Sawyer X, Sergey +Aleynikov, Steffen Müller, Steve Hay, Tony Cook, Yves Orton, Zefram. The list above is almost certainly incomplete as it is automatically generated from version control history. In particular, it does not include the names of diff --git a/pod/perldeprecation.pod b/pod/perldeprecation.pod index 0a13504..1401bfb 100644 --- a/pod/perldeprecation.pod +++ b/pod/perldeprecation.pod @@ -340,25 +340,6 @@ C. This feature was deprecated in Perl 5.004, and will be fatal in Perl 5.28. -=head3 Use of C<< \cI >> to specify a printable character. - -In a double quoted context, Perl has the C<< \c >> construct to write -control characters in a readable way. For instance, the tab character -can be written as C<< \cI >> (I<< control-I >>), and the escape -character can be written as C<< \c[ >>. - -Due to implementation details, the C<< \c >> construct can be used -to create regular, printable, characters as well. For instance, -C<< \c >> maps a C<< , >> to C<< l >>; that is C<< \c, >> is an -obscure way of writing C<< l >>. And not only that, it's also not -portable between ASCII and EBCDIC platforms. - -Using the C<< \c >> construct with an argument which maps to a -printable character was deprecated in Perl 5.14, and will be a fatal -error in Perl 5.28. You're recommended to just write the intended -character instead. - - =head3 Use of code points over 0xFF in string bitwise operators The string bitwise operators, C<&>, C<|>, C<^>, and C<~>, treat diff --git a/pod/perldiag.pod b/pod/perldiag.pod index afdcb73..390ba81 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -839,6 +839,13 @@ C loop nor a C block. (Note that this error is issued on exit from the C block, so you won't get the error if you use an explicit C.) +=item Can't determine class of operator %s, assuming BASEOP + +(S) This warning indicates something wrong in the internals of perl. +Perl was trying to find the class (e.g. LISTOP) of a particular OP, +and was unable to do so. This is likely to be due to a bug in the perl +internals, or due to a bug in XS code which manipulates perl optrees. + =item Can't do inplace edit: %s is not a regular file (S inplace) You tried to use the B<-i> switch on a special file, such as @@ -1539,7 +1546,7 @@ Perhaps you need to copy the value to a temporary, and repeat that. Note that ASCII characters that don't map to control characters are discouraged, and will generate the warning (when enabled) -L. +L. =item Character following \%c must be '{' or a single-character Unicode property name in regex; marked by <-- HERE in m/%s/ @@ -1646,15 +1653,13 @@ See L. (W unopened) You tried chdir() on a filehandle that was never opened. -=item "\c%c" is more clearly written simply as "%s". This will be a fatal error in Perl 5.28 +=item "\c%c" is more clearly written simply as "%s" -(D deprecated, syntax) The C<\cI> construct is intended to be a -way to specify non-printable characters. You used it for a printable -one, which is better written as simply itself, perhaps preceded by -a backslash for non-word characters. Doing it the way you did is -not portable between ASCII and EBCDIC platforms. - -This usage is going to result in a fatal error in Perl 5.28. +(W syntax) The C<\cI> construct is intended to be a way to specify +non-printable characters. You used it for a printable one, which +is better written as simply itself, perhaps preceded by a backslash +for non-word characters. Doing it the way you did is not portable +between ASCII and EBCDIC platforms. =item Cloning substitution context is unimplemented @@ -2188,7 +2193,7 @@ variable and glob that. (F) The C function is not implemented on some systems, e.g., Symbian OS. See L. -=item Execution of %s aborted due to compilation errors. +=item %sExecution of %s aborted due to compilation errors. (F) The final summary message when a Perl compilation fails. @@ -3133,13 +3138,13 @@ code. In Perl 5.30, it will no longer be possible to use sysread(), recv(), syswrite() or send() to read or send bytes from/to :utf8 handles. -=item "%s" is more clearly written simply as "%s" in regex. This will be a fatal error in Perl 5.28; marked by S<<-- HERE> in m/%s/ +=item "%s" is more clearly written simply as "%s" in regex; marked by S<<-- HERE> in m/%s/ -(W deprecated, regexp) (only under C> or within C<(?[...])>) +(W regexp) (only under C> or within C<(?[...])>) You specified a character that has the given plainer way of writing it, and which is also portable to platforms running with different character -sets. This usage is deprecated, and will be a fatal error in Perl 5.28. +sets. =item $* is no longer supported. Its use will be fatal in Perl 5.30 @@ -3400,7 +3405,7 @@ the variable, C<%s>, part of the message. One possible cause is that you set the UTF8 flag yourself for data that you thought to be in UTF-8 but it wasn't (it was for example legacy -8-bit data). To guard against this, you can use Encode::decode_utf8. +8-bit data). To guard against this, you can use C. If you use the C<:encoding(UTF-8)> PerlIO layer for input, invalid byte sequences are handled gracefully, but if you use C<:utf8>, the flag is @@ -3409,14 +3414,6 @@ message. See also L. -=item Malformed UTF-8 character immediately after '%s' - -(F) You said C, but the program file doesn't comply with UTF-8 -encoding rules. The message prints out the properly encoded characters -just before the first bad one. If C warnings are enabled, a -warning is generated that gives more details about the type of -malformation. - =item Malformed UTF-8 returned by \N{%s} immediately after '%s' (F) The charnames handler returned malformed UTF-8. @@ -4235,14 +4232,16 @@ the braces. (4294967295) and therefore non-portable between systems. See L for more on portability concerns. -=item Odd name/value argument for subroutine +=item Odd name/value argument for subroutine '%s' (F) A subroutine using a slurpy hash parameter in its signature received an odd number of arguments to populate the hash. It requires the arguments to be paired, with the same number of keys as values. -The caller of the subroutine is presumably at fault. Inconveniently, -this error will be reported at the location of the subroutine, not that -of the caller. +The caller of the subroutine is presumably at fault. + +The message attempts to include the name of the called subroutine. If the +subroutine has been aliased, the subroutine's original name will be shown, +regardless of what name the caller used. =item Odd number of arguments for overload::constant @@ -6086,12 +6085,16 @@ See L. (F) There has to be at least one argument to syscall() to specify the system call to call, silly dilly. -=item Too few arguments for subroutine +=item Too few arguments for subroutine '%s' (F) A subroutine using a signature received too few arguments than required by the signature. The caller of the subroutine is presumably at fault. +The message attempts to include the name of the called subroutine. If the +subroutine has been aliased, the subroutine's original name will be shown, +regardless of what name the caller used. + =item Too late for "-%s" option (X) The #! line (or local equivalent) in a Perl script contains the @@ -6122,12 +6125,15 @@ BEGIN block. (F) The function requires fewer arguments than you specified. -=item Too many arguments for subroutine +=item Too many arguments for subroutine '%s' (F) A subroutine using a signature received too many arguments than required by the signature. The caller of the subroutine is presumably at fault. +The message attempts to include the name of the called subroutine. If the +subroutine has been aliased, the subroutine's original name will be shown, +regardless of what name the caller used. =item Too many )'s @@ -7105,7 +7111,7 @@ but will become a fatal error in a future version of perl. Untaint your arguments. See L. =item Use of unassigned code point or non-standalone grapheme for a -delimiter will be a fatal error starting in Perl v5.30 +delimiter will be a fatal error starting in Perl 5.30 (D deprecated) A grapheme is what appears to a native-speaker of a language to be a diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 1e32cca..10651b4 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2035,86 +2035,187 @@ X X =for Pod::Functions catch exceptions or compile and run code -In the first form, often referred to as a "string eval", the return -value of EXPR is parsed and executed as if it -were a little Perl program. The value of the expression (which is itself -determined within scalar context) is first parsed, and if there were no -errors, executed as a block within the lexical context of the current Perl -program. This means, that in particular, any outer lexical variables are -visible to it, and any package variable settings or subroutine and format -definitions remain afterwards. - -Note that the value is parsed every time the L|/eval EXPR> -executes. If EXPR is omitted, evaluates L|perlvar/$_>. This form -is typically used to delay parsing and subsequent execution of the text -of EXPR until run time. - -If the -L feature|feature/The 'unicode_eval' and 'evalbytes' features> -is enabled (which is the default under a -C or higher declaration), EXPR or L|perlvar/$_> is -treated as a string of characters, so L|utf8> declarations -have no effect, and source filters are forbidden. In the absence of the -L feature|feature/The 'unicode_eval' and 'evalbytes' features>, -will sometimes be treated as characters and sometimes as bytes, -depending on the internal encoding, and source filters activated within -the L|/eval EXPR> exhibit the erratic, but historical, behaviour -of affecting some outer file scope that is still compiling. See also -the L|/evalbytes EXPR> operator, which always treats its -input as a byte stream and works properly with source filters, and the -L pragma. - -Problems can arise if the string expands a scalar containing a floating -point number. That scalar can expand to letters, such as C<"NaN"> or -C<"Infinity">; or, within the scope of a L|locale>, the -decimal point character may be something other than a dot (such as a -comma). None of these are likely to parse as you are likely expecting. - -In the second form, the code within the BLOCK is parsed only once--at the -same time the code surrounding the L|/eval EXPR> itself was -parsed--and executed +C in all its forms is used to execute a little Perl program, +trapping any errors encountered so they don't crash the calling program. + +Plain C with no argument is just C, where the +expression is understood to be contained in L|perlvar/$_>. Thus +there are only two real C forms; the one with an EXPR is often +called "string eval". In a string eval, the value of the expression +(which is itself determined within scalar context) is first parsed, and +if there were no errors, executed as a block within the lexical context +of the current Perl program. This form is typically used to delay +parsing and subsequent execution of the text of EXPR until run time. +Note that the value is parsed every time the C executes. + +The other form is called "block eval". It is less general than string +eval, but the code within the BLOCK is parsed only once (at the same +time the code surrounding the C itself was parsed) and executed within the context of the current Perl program. This form is typically -used to trap exceptions more efficiently than the first (see below), while -also providing the benefit of checking the code within BLOCK at compile -time. - -The final semicolon, if any, may be omitted from the value of EXPR or within -the BLOCK. +used to trap exceptions more efficiently than the first, while also +providing the benefit of checking the code within BLOCK at compile time. +BLOCK is parsed and compiled just once. Since errors are trapped, it +often is used to check if a given feature is available. In both forms, the value returned is the value of the last expression -evaluated inside the mini-program; a return statement may be also used, just +evaluated inside the mini-program; a return statement may also be used, just as with subroutines. The expression providing the return value is evaluated in void, scalar, or list context, depending on the context of the -L|/eval EXPR> itself. See L|/wantarray> for more +C itself. See L|/wantarray> for more on how the evaluation context can be determined. If there is a syntax error or runtime error, or a L|/die LIST> -statement is executed, L|/eval EXPR> returns -L|/undef EXPR> in scalar context or an empty list in list +statement is executed, C returns +L|/undef EXPR> in scalar context, or an empty list in list context, and L|perlvar/$@> is set to the error message. (Prior to 5.16, a bug caused L|/undef EXPR> to be returned in list context for syntax errors, but not for runtime errors.) If there was no error, L|perlvar/$@> is set to the empty string. A control flow operator like L|/last LABEL> or L|/goto LABEL> can bypass the setting of L|perlvar/$@>. Beware that using -L|/eval EXPR> neither silences Perl from printing warnings to +C neither silences Perl from printing warnings to STDERR, nor does it stuff the text of warning messages into L|perlvar/$@>. To do either of those, you have to use the L|perlvar/%SIG> facility, or turn off warnings inside the BLOCK or EXPR using S>. See L|/warn LIST>, L, and L. -Note that, because L|/eval EXPR> traps otherwise-fatal errors, +Note that, because C traps otherwise-fatal errors, it is useful for determining whether a particular feature (such as L|/socket SOCKET,DOMAIN,TYPE,PROTOCOL> or L|/symlink OLDFILE,NEWFILE>) is implemented. It is also Perl's exception-trapping mechanism, where the L|/die LIST> operator is used to raise exceptions. -If you want to trap errors when loading an XS module, some problems with -the binary interface (such as Perl version skew) may be fatal even with -L|/eval EXPR> unless C<$ENV{PERL_DL_NONLAZY}> is set. See -L. +Before Perl 5.14, the assignment to L|perlvar/$@> occurred before +restoration +of localized variables, which means that for your code to run on older +versions, a temporary is required if you want to mask some, but not all +errors: + + # alter $@ on nefarious repugnancy only + { + my $e; + { + local $@; # protect existing $@ + eval { test_repugnancy() }; + # $@ =~ /nefarious/ and die $@; # Perl 5.14 and higher only + $@ =~ /nefarious/ and $e = $@; + } + die $e if defined $e + } + +There are some different considerations for each form: + +=over 4 + +=item String eval + +Since the return value of EXPR is executed as a block within the lexical +context of the current Perl program, any outer lexical variables are +visible to it, and any package variable settings or subroutine and +format definitions remain afterwards. + +=over 4 + +=item Under the L feature|feature/The 'unicode_eval' and 'evalbytes' features> + +If this feature is enabled (which is the default under a C or +higher declaration), EXPR is considered to be +in the same encoding as the surrounding program. Thus if +S|utf8>> is in effect, the string will be treated as being +UTF-8 encoded. Otherwise, the string is considered to be a sequence of +independent bytes. Bytes that correspond to ASCII-range code points +will have their normal meanings for operators in the string. The +treatment of the other bytes depends on if the +L feature|feature/The 'unicode_strings' feature> is +in effect. + +In a plain C without an EXPR argument, being in S> or +not is irrelevant; the UTF-8ness of C<$_> itself determines the +behavior. + +Any S> or S> declarations within the string have +no effect, and source filters are forbidden. (C, +however, can appear within the string. See also the +L|/evalbytes EXPR> operator, which works properly with +source filters. + +Variables defined outside the C and used inside it retain their +original UTF-8ness. Everything inside the string follows the normal +rules for a Perl program with the given state of S>. + +=item Outside the C<"unicode_eval"> feature + +In this case, the behavior is problematic and is not so easily +described. Here are two bugs that cannot easily be fixed without +breaking existing programs: + +=over 4 + +=item * + +It can lose track of whether something should be encoded as UTF-8 or +not. + +=item * + +Source filters activated within C leak out into whichever file +scope is currently being compiled. To give an example with the CPAN module +L: + + BEGIN { eval "use Semi::Semicolons; # not filtered" } + # filtered here! + +L|/evalbytes EXPR> fixes that to work the way one would +expect: + + use feature "evalbytes"; + BEGIN { evalbytes "use Semi::Semicolons; # filtered" } + # not filtered + +=back + +=back + +Problems can arise if the string expands a scalar containing a floating +point number. That scalar can expand to letters, such as C<"NaN"> or +C<"Infinity">; or, within the scope of a L|locale>, the +decimal point character may be something other than a dot (such as a +comma). None of these are likely to parse as you are likely expecting. + +You should be especially careful to remember what's being looked at +when: + + eval $x; # CASE 1 + eval "$x"; # CASE 2 + + eval '$x'; # CASE 3 + eval { $x }; # CASE 4 + + eval "\$$x++"; # CASE 5 + $$x++; # CASE 6 + +Cases 1 and 2 above behave identically: they run the code contained in +the variable $x. (Although case 2 has misleading double quotes making +the reader wonder what else might be happening (nothing is).) Cases 3 +and 4 likewise behave in the same way: they run the code C<'$x'>, which +does nothing but return the value of $x. (Case 4 is preferred for +purely visual reasons, but it also has the advantage of compiling at +compile-time instead of at run-time.) Case 5 is a place where +normally you I like to use double quotes, except that in this +particular situation, you can just use symbolic references instead, as +in case 6. + +An C executed within a subroutine defined +in the C package doesn't see the usual +surrounding lexical scope, but rather the scope of the first non-DB piece +of code that called it. You don't normally need to worry about this unless +you are writing a Perl debugger. + +The final semicolon, if any, may be omitted from the value of EXPR. + +=item Block eval If the code to be executed doesn't vary, you may use the eval-BLOCK form to trap run-time errors without incurring the penalty of @@ -2134,6 +2235,11 @@ Examples: # a run-time error eval '$answer ='; # sets $@ +If you want to trap errors when loading an XS module, some problems with +the binary interface (such as Perl version skew) may be fatal even with +C unless C<$ENV{PERL_DL_NONLAZY}> is set. See +L. + Using the C form as an exception trap in libraries does have some issues. Due to the current arguably broken state of C<__DIE__> hooks, you may wish not to trigger any C<__DIE__> hooks that user code may have installed. @@ -2159,56 +2265,13 @@ messages: Because this promotes action at a distance, this counterintuitive behavior may be fixed in a future release. -With an L|/eval EXPR>, you should be especially careful to -remember what's being looked at when: - - eval $x; # CASE 1 - eval "$x"; # CASE 2 - - eval '$x'; # CASE 3 - eval { $x }; # CASE 4 - - eval "\$$x++"; # CASE 5 - $$x++; # CASE 6 - -Cases 1 and 2 above behave identically: they run the code contained in -the variable $x. (Although case 2 has misleading double quotes making -the reader wonder what else might be happening (nothing is).) Cases 3 -and 4 likewise behave in the same way: they run the code C<'$x'>, which -does nothing but return the value of $x. (Case 4 is preferred for -purely visual reasons, but it also has the advantage of compiling at -compile-time instead of at run-time.) Case 5 is a place where -normally you I like to use double quotes, except that in this -particular situation, you can just use symbolic references instead, as -in case 6. - -Before Perl 5.14, the assignment to L|perlvar/$@> occurred before -restoration -of localized variables, which means that for your code to run on older -versions, a temporary is required if you want to mask some but not all -errors: - - # alter $@ on nefarious repugnancy only - { - my $e; - { - local $@; # protect existing $@ - eval { test_repugnancy() }; - # $@ =~ /nefarious/ and die $@; # Perl 5.14 and higher only - $@ =~ /nefarious/ and $e = $@; - } - die $e if defined $e - } - C does I count as a loop, so the loop control statements L|/next LABEL>, L|/last LABEL>, or L|/redo LABEL> cannot be used to leave or restart the block. -An C executed within a subroutine defined -in the C package doesn't see the usual -surrounding lexical scope, but rather the scope of the first non-DB piece -of code that called it. You don't normally need to worry about this unless -you are writing a Perl debugger. +The final semicolon, if any, may be omitted from within the BLOCK. + +=back =item evalbytes EXPR X @@ -2217,18 +2280,42 @@ X =for Pod::Functions +evalbytes similar to string eval, but intend to parse a bytestream -This function is like L|/eval EXPR> with a string argument, -except it always parses its argument, or L|perlvar/$_> if EXPR is -omitted, as a string of bytes. A string containing characters whose -ordinal value exceeds 255 results in an error. Source filters activated -within the evaluated code apply to the code itself. +This function is similar to a L, except it +always parses its argument (or L|perlvar/$_> if EXPR is omitted) +as a string of independent bytes. -L|/evalbytes EXPR> is available only if the -L feature|feature/The 'unicode_eval' and 'evalbytes' features> -is enabled or if it is prefixed with C. The +If called when S> is in effect, the string will be assumed +to be encoded in UTF-8, and C will make a temporary +downgraded to non-UTF-8 copy to work from. If this is not possible +(because one or more characters in it require UTF-8), the C +will fail with the error stored in C<$@>. + +Bytes that correspond to ASCII-range code points will have their normal +meanings for operators in the string. The treatment of the other bytes +depends on if the L feature|feature/The +'unicode_strings' feature> is in effect. + +Of course, variables that are UTF-8 and are referred to in the string +retain that: + + my $a = "\x{100}"; + evalbytes 'print ord $a, "\n"'; + +prints + + 256 + +and C<$@> is empty. + +Source filters activated within the evaluated code apply to the code +itself. + +L|/evalbytes EXPR> is available starting in Perl v5.16. To +access it, you must say C, but you can omit the +C if the L feature|feature/The 'unicode_eval' and 'evalbytes' features> -is enabled automatically with a C (or higher) declaration in -the current scope. +is enabled. This is enabled automatically with a C (or +higher) declaration in the current scope. =item exec LIST X X @@ -3763,8 +3850,8 @@ many elements these have. For that, use C and C|/length EXPR> normally deals in logical characters, not physical bytes. For how many bytes a string encoded as -UTF-8 would take up, use C (you'll have -to C first). See L and L. +UTF-8 would take up, use C +(you'll have to C first). See L and L. =item __LINE__ X<__LINE__> @@ -6090,7 +6177,7 @@ Note the I: depending on the status of the socket, either (8-bit) bytes or characters are received. By default all sockets operate on bytes, but for example if the socket has been changed using L|/binmode FILEHANDLE, LAYER> to operate with the -C<:encoding(utf8)> I/O layer (see the L pragma), the I/O will +C<:encoding(UTF-8)> I/O layer (see the L pragma), the I/O will operate on UTF8-encoded Unicode characters, not bytes. Similarly for the C<:encoding> layer: in that case pretty much any characters can be read. @@ -6650,7 +6737,7 @@ of the file) from the L module. Returns C<1> on success, false otherwise. Note the emphasis on bytes: even if the filehandle has been set to operate -on characters (for example using the C<:encoding(utf8)> I/O layer), the +on characters (for example using the C<:encoding(UTF-8)> I/O layer), the L|/seek FILEHANDLE,POSITION,WHENCE>, L|/tell FILEHANDLE>, and L|/sysseek FILEHANDLE,POSITION,WHENCE> @@ -6889,7 +6976,7 @@ Note the I: depending on the status of the socket, either (8-bit) bytes or characters are sent. By default all sockets operate on bytes, but for example if the socket has been changed using L|/binmode FILEHANDLE, LAYER> to operate with the -C<:encoding(utf8)> I/O layer (see L|/open FILEHANDLE,EXPR>, or +C<:encoding(UTF-8)> I/O layer (see L|/open FILEHANDLE,EXPR>, or the L pragma), the I/O will operate on UTF-8 encoded Unicode characters, not bytes. Similarly for the C<:encoding> layer: in that case pretty much any characters can be sent. @@ -7167,9 +7254,7 @@ If the subroutine's prototype is C<($$)>, the elements to be compared are passed by reference in L|perlvar/@_>, as for a normal subroutine. This is slower than unprototyped subroutines, where the elements to be compared are passed into the subroutine as the package global variables -C<$a> and C<$b> (see example below). Note that in the latter case, it -is usually highly counter-productive to declare C<$a> and C<$b> as -lexicals. +C<$a> and C<$b> (see example below). If the subroutine is an XSUB, the elements to be compared are pushed on to the stack, the way arguments are usually passed to XSUBs. C<$a> and @@ -7314,16 +7399,63 @@ C then you can use: my @contact = sort(find_records @key); my @contact = sort(find_records (@key)); -You I declare C<$a> -and C<$b> as lexicals. They are package globals. That means -that if you're in the C
package and type - - my @articles = sort {$b <=> $a} @files; - -then C<$a> and C<$b> are C<$main::a> and C<$main::b> (or C<$::a> and C<$::b>), -but if you're in the C package, it's the same as typing - - my @articles = sort {$FooPack::b <=> $FooPack::a} @files; +C<$a> and C<$b> are set as package globals in the package the sort() is +called from. That means C<$main::a> and C<$main::b> (or C<$::a> and +C<$::b>) in the C
package, C<$FooPack::a> and C<$FooPack::b> in the +C package, etc. If the sort block is in scope of a C or +C declaration of C<$a> and/or C<$b>, you I spell out the full +name of the variables in the sort block : + + package main; + my $a = "C"; # DANGER, Will Robinson, DANGER !!! + + print sort { $a cmp $b } qw(A C E G B D F H); + # WRONG + sub badlexi { $a cmp $b } + print sort badlexi qw(A C E G B D F H); + # WRONG + # the above prints BACFEDGH or some other incorrect ordering + + print sort { $::a cmp $::b } qw(A C E G B D F H); + # OK + print sort { our $a cmp our $b } qw(A C E G B D F H); + # also OK + print sort { our ($a, $b); $a cmp $b } qw(A C E G B D F H); + # also OK + sub lexi { our $a cmp our $b } + print sort lexi qw(A C E G B D F H); + # also OK + # the above print ABCDEFGH + +With proper care you may mix package and my (or state) C<$a> and/or C<$b>: + + my $a = { + tiny => -2, + small => -1, + normal => 0, + big => 1, + huge => 2 + }; + + say sort { $a->{our $a} <=> $a->{our $b} } + qw{ huge normal tiny small big}; + + # prints tinysmallnormalbighuge + +C<$a> and C<$b> are implicitely local to the sort() execution and regain their +former values upon completing the sort. + +Sort subroutines written using C<$a> and C<$b> are bound to their calling +package. It is possible, but of limited interest, to define them in a +different package, since the subroutine must still refer to the calling +package's C<$a> and C<$b> : + + package Foo; + sub lexi { $Bar::a cmp $Bar::b } + package Bar; + ... sort Foo::lexi ... + +Use the prototyped versions (see above) for a more generic alternative. The comparison function is required to behave. If it returns inconsistent results (sometimes saying C<$x[1]> is less than C<$x[2]> and @@ -8490,7 +8622,7 @@ to the current position plus POSITION; and C<2> to set it to EOF plus POSITION, typically negative. Note the emphasis on bytes: even if the filehandle has been set to operate -on characters (for example using the C<:encoding(utf8)> I/O layer), the +on characters (for example using the C<:encoding(UTF-8)> I/O layer), the L|/seek FILEHANDLE,POSITION,WHENCE>, L|/tell FILEHANDLE>, and L|/sysseek FILEHANDLE,POSITION,WHENCE> @@ -8657,7 +8789,7 @@ the actual filehandle. If FILEHANDLE is omitted, assumes the file last read. Note the emphasis on bytes: even if the filehandle has been set to operate -on characters (for example using the C<:encoding(utf8)> I/O layer), the +on characters (for example using the C<:encoding(UTF-8)> I/O layer), the L|/seek FILEHANDLE,POSITION,WHENCE>, L|/tell FILEHANDLE>, and L|/sysseek FILEHANDLE,POSITION,WHENCE> diff --git a/pod/perlhist.pod b/pod/perlhist.pod index 72efc7b..dfb3b2c 100644 --- a/pod/perlhist.pod +++ b/pod/perlhist.pod @@ -33,7 +33,7 @@ Matt S Trout, David Golden, Florian Ragwitz, Tatsuhiko Miyagawa, Chris C Williams, Zefram, Ævar Arnfjörð Bjarmason, Stevan Little, Dave Rolsky, Max Maischein, Abigail, Jesse Luehrs, Tony Cook, Dominic Hargreaves, Aaron Crane, Aristotle Pagaltzis, Matthew Horsfall, -Peter Martini, Sawyer X, and Chad 'Exodist' Granum. +Peter Martini, Sawyer X, Chad 'Exodist' Granum, and Renee Bäcker. =head2 PUMPKIN? @@ -608,6 +608,7 @@ the strings?). Chad 5.25.7 2016-Nov-20 Sawyer X 5.25.8 2016-Dec-20 Abigail 5.25.9 2017-Jan-20 + Renee 5.25.10 2017-Feb-20 =head2 SELECTED RELEASE SIZES diff --git a/pod/perlop.pod b/pod/perlop.pod index 6550133..7df98f7 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1268,16 +1268,19 @@ The only operators with lower precedence are the logical operators C<"and">, C<"or">, and C<"not">, which may be used to evaluate calls to list operators without the need for parentheses: - open HANDLE, "< :utf8", "filename" or die "Can't open: $!\n"; + open HANDLE, "< :encoding(UTF-8)", "filename" + or die "Can't open: $!\n"; However, some people find that code harder to read than writing it with parentheses: - open(HANDLE, "< :utf8", "filename") or die "Can't open: $!\n"; + open(HANDLE, "< :encoding(UTF-8)", "filename") + or die "Can't open: $!\n"; in which case you might as well just use the more customary C<"||"> operator: - open(HANDLE, "< :utf8", "filename") || die "Can't open: $!\n"; + open(HANDLE, "< :encoding(UTF-8)", "filename") + || die "Can't open: $!\n"; See also discussion of list operators in L. diff --git a/pod/perlpacktut.pod b/pod/perlpacktut.pod index f40d1c2..f6a9411 100644 --- a/pod/perlpacktut.pod +++ b/pod/perlpacktut.pod @@ -668,9 +668,10 @@ Usually you'll want to pack or unpack UTF-8 strings: my @hebrew = unpack( 'U*', $utf ); Please note: in the general case, you're better off using -Encode::decode_utf8 to decode a UTF-8 encoded byte string to a Perl -Unicode string, and Encode::encode_utf8 to encode a Perl Unicode string -to UTF-8 bytes. These functions provide means of handling invalid byte +L|Encode/decode> to decode a UTF-8 +encoded byte string to a Perl Unicode string, and +L|Encode/encode> to encode a Perl Unicode +string to UTF-8 bytes. These functions provide means of handling invalid byte sequences and generally have a friendlier interface. =head2 Another Portable Binary Encoding diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod index 1c07632..22f71ab 100644 --- a/pod/perlrecharclass.pod +++ b/pod/perlrecharclass.pod @@ -1088,11 +1088,14 @@ white space within it. This is allowed because Cxx> is automatically turned on within this construct. All the other escapes accepted by normal bracketed character classes are -accepted here as well; but unrecognized escapes that generate warnings -in normal classes are fatal errors here. - -All warnings from these class elements are fatal, as well as some -practices that don't currently warn. For example you cannot say +accepted here as well. + +Because this construct compiles under +L|re/'strict' mode>, unrecognized escapes that +generate warnings in normal classes are fatal errors here, as well as +all other warnings from these class elements, as well as some +practices that don't currently warn outside C. For example +you cannot say /(?[ [ \xF ] ])/ # Syntax error! diff --git a/pod/perlretut.pod b/pod/perlretut.pod index 9c7ab56..87ef42b 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -197,6 +197,14 @@ be backslashed: 'C:\WIN32' =~ /C:\\WIN/; # matches +In situations where it doesn't make sense for a particular metacharacter +to mean what it normally does, it automatically loses its +metacharacter-ness and becomes an ordinary character that is to be +matched literally. For example, the C<'}'> is a metacharacter only when +it is the mate of a C<'{'> metacharacter. Otherwise it is treated as a +literal RIGHT CURLY BRACKET. This may lead to unexpected results. +L|re/'strict' mode> can catch some of these. + In addition to the metacharacters, there are some ASCII characters which don't have printable character equivalents and are instead represented by I. Common examples are C<\t> for a diff --git a/pod/perlrun.pod b/pod/perlrun.pod index 9d59a6a..b4bb5a3 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -1121,7 +1121,7 @@ A pseudolayer that enables a flag in the layer below to tell Perl that output should be in utf8 and that input should be regarded as already in valid utf8 form. B Generally C<:encoding(utf8)> is +can occur with non-shortest UTF-8 encodings, etc.> Generally C<:encoding(UTF-8)> is the best option when reading UTF-8 encoded data. =item :win32 diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index 33e52b3..23818a1 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -1889,7 +1889,7 @@ work under 5.6, so you should be safe to try them out. A filehandle that should read or write UTF-8 if ($] > 5.008) { - binmode $fh, ":encoding(utf8)"; + binmode $fh, ":encoding(UTF-8)"; } =item * @@ -1904,7 +1904,7 @@ check the documentation to verify if this is still true. if ($] > 5.008) { require Encode; - $val = Encode::encode_utf8($val); # make octets + $val = Encode::encode("UTF-8", $val); # make octets } =item * @@ -1916,7 +1916,7 @@ want the UTF8 flag restored: if ($] > 5.008) { require Encode; - $val = Encode::decode_utf8($val); + $val = Encode::decode("UTF-8", $val); } =item * @@ -2017,8 +2017,8 @@ Perl's internal representation like so: sub my_escape_html ($) { my($what) = shift; return unless defined $what; - Encode::decode_utf8(Foo::Bar::escape_html( - Encode::encode_utf8($what))); + Encode::decode("UTF-8", Foo::Bar::escape_html( + Encode::encode("UTF-8", $what))); } Sometimes, when the extension does not convert data but just stores diff --git a/pod/perlunicook.pod b/pod/perlunicook.pod index ac30509..eb395f7 100644 --- a/pod/perlunicook.pod +++ b/pod/perlunicook.pod @@ -26,7 +26,7 @@ to work correctly, with the C<#!> adjusted to work on your system: use strict; # quote strings, declare variables use warnings; # on by default use warnings qw(FATAL utf8); # fatalize encoding glitches - use open qw(:std :utf8); # undeclared streams in UTF-8 + use open qw(:std :encoding(UTF-8)); # undeclared streams in UTF-8 use charnames qw(:full :short); # unneeded in v5.16 This I make even Unix programmers C your binary streams, @@ -234,8 +234,8 @@ C as described later below. or $ export PERL_UNICODE=A or - use Encode qw(decode_utf8); - @ARGV = map { decode_utf8($_, 1) } @ARGV; + use Encode qw(decode); + @ARGV = map { decode('UTF-8', $_, 1) } @ARGV; =head2 ℞ 14: Decode program arguments as locale encoding @@ -255,9 +255,9 @@ call C explicitly: or $ export PERL_UNICODE=S or - use open qw(:std :utf8); + use open qw(:std :encoding(UTF-8)); or - binmode(STDIN, ":utf8"); + binmode(STDIN, ":encoding(UTF-8)"); binmode(STDOUT, ":utf8"); binmode(STDERR, ":utf8"); @@ -280,7 +280,7 @@ Files opened without an encoding argument will be in UTF-8: or $ export PERL_UNICODE=D or - use open qw(:utf8); + use open qw(:encoding(UTF-8)); =head2 ℞ 18: Make all I/O and args default to utf8 @@ -288,9 +288,9 @@ Files opened without an encoding argument will be in UTF-8: or $ export PERL_UNICODE=SDA or - use open qw(:std :utf8); - use Encode qw(decode_utf8); - @ARGV = map { decode_utf8($_, 1) } @ARGV; + use open qw(:std :encoding(UTF-8)); + use Encode qw(decode); + @ARGV = map { decode('UTF-8', $_, 1) } @ARGV; =head2 ℞ 19: Open file with specific encoding @@ -701,7 +701,7 @@ Here's that program; tested on v5.14. use strict; use warnings; use warnings qw(FATAL utf8); # fatalize encoding faults - use open qw(:std :utf8); # undeclared streams in UTF-8 + use open qw(:std :encoding(UTF-8)); # undeclared streams in UTF-8 use charnames qw(:full :short); # unneeded in v5.16 # std modules diff --git a/pod/perlunifaq.pod b/pod/perlunifaq.pod index 4135fba..ba391d4 100644 --- a/pod/perlunifaq.pod +++ b/pod/perlunifaq.pod @@ -199,7 +199,9 @@ or by letting automatic decoding and encoding do all the work: =head2 What are C and C? These are alternate syntaxes for C and C. +...)>. Do not use these functions for data exchange. Instead use +C and C; see +L below. =head2 What is a "wide character"? @@ -283,7 +285,7 @@ C is the official standard. C is Perl's way of being liberal in what it accepts. If you have to communicate with things that aren't so liberal, you may want to consider using C. If you have to communicate with things that are too liberal, you may have to use C. The full explanation is in -L. +L. C is internally known as C. The tutorial uses UTF-8 consistently, even where utf8 is actually used internally, because the diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod index cd62d4c..d35de34 100644 --- a/pod/perluniintro.pod +++ b/pod/perluniintro.pod @@ -358,7 +358,7 @@ The C module knows about many encodings and has interfaces for doing conversions between those encodings: use Encode 'decode'; - $data = decode("iso-8859-3", $data); # convert from legacy to utf-8 + $data = decode("iso-8859-3", $data); # convert from legacy =head2 Unicode I/O @@ -393,7 +393,7 @@ many encodings have several aliases. Note that the C<:utf8> layer must always be specified exactly like that; it is I subject to the loose matching of encoding names. Also note that currently C<:utf8> is unsafe for input, because it accepts the data without validating that it is indeed valid -UTF-8; you should instead use C<:encoding(utf-8)> (with or without a +UTF-8; you should instead use C<:encoding(UTF-8)> (with or without a hyphen). See L for the C<:utf8> layer, L and @@ -406,7 +406,7 @@ Unicode or legacy encodings does not magically turn the data into Unicode in Perl's eyes. To do that, specify the appropriate layer when opening files - open(my $fh,'<:encoding(utf8)', 'anything'); + open(my $fh,'<:encoding(UTF-8)', 'anything'); my $line_of_unicode = <$fh>; open(my $fh,'<:encoding(Big5)', 'anything'); @@ -415,8 +415,8 @@ layer when opening files The I/O layers can also be specified more flexibly with the C pragma. See L, or look at the following example. - use open ':encoding(utf8)'; # input/output default encoding will be - # UTF-8 + use open ':encoding(UTF-8)'; # input/output default encoding will be + # UTF-8 open X, ">file"; print X chr(0x100), "\n"; close X; @@ -485,12 +485,12 @@ by repeatedly encoding the data: local $/; ## read in the whole file of 8-bit characters $t = ; close F; - open F, ">:encoding(utf8)", "file"; + open F, ">:encoding(UTF-8)", "file"; print F $t; ## convert to UTF-8 on output close F; If you run this code twice, the contents of the F will be twice -UTF-8 encoded. A C would have avoided the +UTF-8 encoded. A C would have avoided the bug, or explicitly opening also the F for input as UTF-8. B: the C<:utf8> and C<:encoding> features work only if your @@ -729,16 +729,13 @@ the output string will be UTF-8-encoded C, but C<$a> will stay byte-encoded. Sometimes you might really need to know the byte length of a string -instead of the character length. For that use either the -C function or the C pragma +instead of the character length. For that use the C pragma and the C function: my $unicode = chr(0x100); print length($unicode), "\n"; # will print 1 - require Encode; - print length(Encode::encode_utf8($unicode)),"\n"; # will print 2 use bytes; - print length($unicode), "\n"; # will also print 2 + print length($unicode), "\n"; # will print 2 # (the 0xC4 0x80 of the UTF-8) no bytes; @@ -755,12 +752,12 @@ How Do I Detect Data That's Not Valid In a Particular Encoding? Use the C package to try converting it. For example, - use Encode 'decode_utf8'; + use Encode 'decode'; - if (eval { decode_utf8($string, Encode::FB_CROAK); 1 }) { - # $string is valid utf8 + if (eval { decode('UTF-8', $string, Encode::FB_CROAK); 1 }) { + # $string is valid UTF-8 } else { - # $string is not valid utf8 + # $string is not valid UTF-8 } Or use C to try decoding it: @@ -791,7 +788,7 @@ If you have a raw sequence of bytes that you know should be interpreted via a particular encoding, you can use C: use Encode 'from_to'; - from_to($data, "iso-8859-1", "utf-8"); # from latin-1 to utf-8 + from_to($data, "iso-8859-1", "UTF-8"); # from latin-1 to UTF-8 The call to C changes the bytes in C<$data>, but nothing material about the nature of the string has changed as far as Perl is @@ -820,8 +817,8 @@ pack/unpack to convert to/from Unicode. If you have a sequence of bytes you B is valid UTF-8, but Perl doesn't know it yet, you can make Perl a believer, too: - use Encode 'decode_utf8'; - $Unicode = decode_utf8($bytes); + $Unicode = $bytes; + utf8::decode($Unicode); or: diff --git a/pp.c b/pp.c index 657abf7..62316fc 100644 --- a/pp.c +++ b/pp.c @@ -3626,7 +3626,7 @@ PP(pp_ord) const U8 *s = (U8*)SvPV_const(argsv, len); SETu(DO_UTF8(argsv) - ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) + ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0) : (UV)(*s)); return NORMAL; @@ -5652,8 +5652,6 @@ PP(pp_reverse) } else { char *up; - char *down; - I32 tmp; dTARGET; STRLEN len; @@ -5666,6 +5664,7 @@ PP(pp_reverse) up = SvPV_force(TARG, len); if (len > 1) { + char *down; if (DO_UTF8(TARG)) { /* first reverse each character */ U8* s = (U8*)SvPVX(TARG); const U8* send = (U8*)(s + len); @@ -5682,9 +5681,9 @@ PP(pp_reverse) down = (char*)(s - 1); /* reverse this character */ while (down > up) { - tmp = *up; + const char tmp = *up; *up++ = *down; - *down-- = (char)tmp; + *down-- = tmp; } } } @@ -5692,9 +5691,9 @@ PP(pp_reverse) } down = SvPVX(TARG) + len - 1; while (down > up) { - tmp = *up; + const char tmp = *up; *up++ = *down; - *down-- = (char)tmp; + *down-- = tmp; } (void)SvPOK_only_UTF8(TARG); } @@ -6801,6 +6800,26 @@ PP(pp_argdefelem) } +static SV * +S_find_runcv_name(void) +{ + dTHX; + CV *cv; + GV *gv; + SV *sv; + + cv = find_runcv(0); + if (!cv) + return &PL_sv_no; + + gv = CvGV(cv); + if (!gv) + return &PL_sv_no; + + sv = sv_2mortal(newSV(0)); + gv_fullname4(sv, gv, NULL, TRUE); + return sv; +} /* Check a a subs arguments - i.e. that it has the correct number of args * (and anything else we might think of in future). Typically used with @@ -6823,14 +6842,15 @@ PP(pp_argcheck) too_few = (argc < (params - opt_params)); if (UNLIKELY(too_few || (!slurpy && argc > params))) - /* diag_listed_as: Too few arguments for subroutine */ - /* diag_listed_as: Too many arguments for subroutine */ - Perl_croak_caller("Too %s arguments for subroutine", - too_few ? "few" : "many"); + /* diag_listed_as: Too few arguments for subroutine '%s' */ + /* diag_listed_as: Too many arguments for subroutine '%s' */ + Perl_croak_caller("Too %s arguments for subroutine '%" SVf "'", + too_few ? "few" : "many", S_find_runcv_name()); if (UNLIKELY(slurpy == '%' && argc > params && (argc - params) % 2)) - Perl_croak_caller("Odd name/value argument for subroutine"); - + /* diag_listed_as: Odd name/value argument for subroutine '%s' */ + Perl_croak_caller("Odd name/value argument for subroutine '%" SVf "'", + S_find_runcv_name()); return NORMAL; } diff --git a/pp_ctl.c b/pp_ctl.c index 2ced82d..ef5b122 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -505,6 +505,8 @@ PP(pp_formline) SvTAINTED_on(PL_formtarget); if (DO_UTF8(PL_formtarget)) targ_is_utf8 = TRUE; + /* this is an initial estimate of how much output buffer space + * to allocate. It may be exceeded later */ linemax = (SvCUR(formsv) * (IN_BYTES ? 1 : 3) + 1); t = SvGROW(PL_formtarget, len + linemax + 1); /* XXX from now onwards, SvCUR(PL_formtarget) is invalid */ @@ -766,6 +768,7 @@ PP(pp_formline) if (targ_is_utf8 && !item_is_utf8) { source = tmp = bytes_to_utf8(source, &to_copy); + grow = to_copy; } else { if (item_is_utf8 && !targ_is_utf8) { U8 *s; @@ -2946,6 +2949,7 @@ PP(pp_goto) OP *gotoprobe = NULL; bool leaving_eval = FALSE; bool in_block = FALSE; + bool pseudo_block = FALSE; PERL_CONTEXT *last_eval_cx = NULL; /* find label */ @@ -2984,11 +2988,9 @@ PP(pp_goto) gotoprobe = PL_main_root; break; case CXt_SUB: - if (CvDEPTH(cx->blk_sub.cv) && !CxMULTICALL(cx)) { - gotoprobe = CvROOT(cx->blk_sub.cv); - break; - } - /* FALLTHROUGH */ + gotoprobe = CvROOT(cx->blk_sub.cv); + pseudo_block = cBOOL(CxMULTICALL(cx)); + break; case CXt_FORMAT: case CXt_NULL: DIE(aTHX_ "Can't \"goto\" out of a pseudo block"); @@ -3017,6 +3019,8 @@ PP(pp_goto) break; } } + if (pseudo_block) + DIE(aTHX_ "Can't \"goto\" out of a pseudo block"); PL_lastgotoprobe = gotoprobe; } if (!retop) @@ -5171,7 +5175,7 @@ S_doparseform(pTHX_ SV *sv) SV *old = mg->mg_obj; if ( !(!!SvUTF8(old) ^ !!SvUTF8(sv)) && len == SvCUR(old) - && strnEQ(SvPVX(old), SvPVX(sv), len) + && strnEQ(SvPVX(old), s, len) ) { DEBUG_f(PerlIO_printf(Perl_debug_log,"Re-using compiled format\n")); return mg; diff --git a/pp_hot.c b/pp_hot.c index aeaecfc..58bbe2f 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -360,7 +360,6 @@ PP(pp_padrange) dSP; PADOFFSET base = PL_op->op_targ; int count = (int)(PL_op->op_private) & OPpPADRANGE_COUNTMASK; - int i; if (PL_op->op_flags & OPf_SPECIAL) { /* fake the RHS of my ($x,$y,..) = @_ */ PUSHMARK(SP); @@ -370,6 +369,8 @@ PP(pp_padrange) /* note, this is only skipped for compile-time-known void cxt */ if ((PL_op->op_flags & OPf_WANT) != OPf_WANT_VOID) { + int i; + EXTEND(SP, count); PUSHMARK(SP); for (i = 0; i > (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT)) == (Size_t)base); @@ -1039,7 +1042,7 @@ PP(pp_rv2av) || ( PL_op->op_private & OPpMAYBE_TRUEBOOL && block_gimme() == G_VOID )) && (!SvRMAGICAL(sv) || !mg_find(sv, PERL_MAGIC_tied))) - SETs(HvUSEDKEYS(sv) ? &PL_sv_yes : &PL_sv_no); + SETs(HvUSEDKEYS(MUTABLE_HV(sv)) ? &PL_sv_yes : &PL_sv_no); else if (gimme == G_SCALAR) { dTARG; TARG = Perl_hv_scalar(aTHX_ MUTABLE_HV(sv)); @@ -1182,6 +1185,7 @@ S_aassign_copy_common(pTHX_ SV **firstlelem, SV **lastlelem, assert(svr); if (UNLIKELY(SvFLAGS(svr) & (SVf_BREAK|SVs_GMG) || copy_all)) { + U32 brk = (SvFLAGS(svr) & SVf_BREAK); #ifdef DEBUGGING if (fake) { @@ -1217,7 +1221,7 @@ S_aassign_copy_common(pTHX_ SV **firstlelem, SV **lastlelem, /* ... but restore afterwards in case it's needed again, * e.g. ($a,$b,$c) = (1,$a,$a) */ - SvFLAGS(svr) |= SVf_BREAK; + SvFLAGS(svr) |= brk; } if (!lcount) diff --git a/pp_pack.c b/pp_pack.c index 737e019..86d138b 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -251,12 +251,15 @@ STATIC U8 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) { STRLEN retlen; - UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, + UV val; + + if (*s >= end) { + goto croak; + } + val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); - /* We try to process malformed UTF-8 as much as possible (preferably with - warnings), but these two mean we make no progress in the string and - might enter an infinite loop */ - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) + croak: Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", (int) TYPE_NO_MODIFIERS(datumtype)); if (val >= 0x100) { @@ -290,7 +293,7 @@ S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_ for (;buf_len > 0; buf_len--) { if (from >= end) return FALSE; val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); - if (retlen == (STRLEN) -1 || retlen == 0) { + if (retlen == (STRLEN) -1) { from += UTF8SKIP(from); bad |= 1; } else from += retlen; @@ -396,7 +399,7 @@ STMT_START { \ STRLEN retlen; \ if (str >= end) break; \ val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ - if (retlen == (STRLEN) -1 || retlen == 0) { \ + if (retlen == (STRLEN) -1) { \ *cur = '\0'; \ Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ } \ @@ -1225,7 +1228,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c STRLEN retlen; aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; } @@ -1248,7 +1251,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c STRLEN retlen; const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; if (!checksum) @@ -1310,7 +1313,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c strend - s, &retlen, UTF8_ALLOW_DEFAULT)); - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; } @@ -2594,10 +2597,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvchr_to_utf8_flags((U8 *) cur, - auv, - warn_utf8 ? - 0 : UNICODE_ALLOW_ANY); + cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0); } else { if (auv >= 0x100) { if (!SvUTF8(cat)) { @@ -2648,9 +2648,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) auv = SvUV_no_inf(fromstr, datumtype); if (utf8) { U8 buffer[UTF8_MAXLEN], *endb; - endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), - warn_utf8 ? - 0 : UNICODE_ALLOW_ANY); + endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0); if (cur+(endb-buffer)*UTF8_EXPAND >= end) { *cur = '\0'; SvCUR_set(cat, cur - start); @@ -2666,9 +2664,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv), - warn_utf8 ? - 0 : UNICODE_ALLOW_ANY); + cur = (char *) uvchr_to_utf8_flags((U8 *) cur, + UNI_TO_NATIVE(auv), + 0); } } break; diff --git a/pp_sort.c b/pp_sort.c index 4ffe224..21e4574 100644 --- a/pp_sort.c +++ b/pp_sort.c @@ -1428,9 +1428,7 @@ S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags) =for apidoc sortsv -Sort an array. Here is an example: - - sortsv(AvARRAY(av), av_top_index(av)+1, Perl_sv_cmp_locale); +In-place sort an array of SV pointers with the given comparison routine. Currently this always uses mergesort. See C> for a more flexible routine. @@ -1449,7 +1447,8 @@ Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) /* =for apidoc sortsv_flags -Sort an array, with various options. +In-place sort an array of SV pointers with the given comparison routine, +with various SORTf_* flag options. =cut */ diff --git a/pp_sys.c b/pp_sys.c index d8e9c30..7a57035 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -953,13 +953,26 @@ PP(pp_tie) */ stash = gv_stashsv(*MARK, 0); if (!stash) { - SV *stashname = SvOK(*MARK) ? *MARK : &PL_sv_no; - if (!SvCUR(*MARK)) { - stashname = sv_2mortal(newSVpvs("main")); + if (SvROK(*MARK)) + DIE(aTHX_ "Can't locate object method \"%s\" via package \"%" SVf "\"", + methname, SVfARG(*MARK)); + else if (isGV(*MARK)) { + /* If the glob doesn't name an existing package, using + * SVfARG(*MARK) would yield "*Foo::Bar" or *main::Foo. So + * generate the name for the error message explicitly. */ + SV *stashname = sv_2mortal(newSV(0)); + gv_fullname4(stashname, (GV *) *MARK, NULL, FALSE); + DIE(aTHX_ "Can't locate object method \"%s\" via package \"%" SVf "\"", + methname, SVfARG(stashname)); + } + else { + SV *stashname = !SvPOK(*MARK) ? &PL_sv_no + : SvCUR(*MARK) ? *MARK + : sv_2mortal(newSVpvs("main")); + DIE(aTHX_ "Can't locate object method \"%s\" via package \"%" SVf "\"" + " (perhaps you forgot to load \"%" SVf "\"?)", + methname, SVfARG(stashname), SVfARG(stashname)); } - DIE(aTHX_ "Can't locate object method \"%s\" via package \"%" SVf "\"" - " (perhaps you forgot to load \"%" SVf "\"?)", - methname, SVfARG(stashname), SVfARG(stashname)); } else if (!(gv = gv_fetchmethod(stash, methname))) { /* The effective name can only be NULL for stashes that have @@ -1419,7 +1432,6 @@ PP(pp_enterwrite) IO *io; GV *fgv; CV *cv = NULL; - SV *tmpsv = NULL; if (MAXARG == 0) { EXTEND(SP, 1); @@ -1443,7 +1455,7 @@ PP(pp_enterwrite) cv = GvFORM(fgv); if (!cv) { - tmpsv = sv_newmortal(); + SV * const tmpsv = sv_newmortal(); gv_efullname4(tmpsv, fgv, NULL, FALSE); DIE(aTHX_ "Undefined format \"%" SVf "\" called", SVfARG(tmpsv)); } @@ -4426,10 +4438,9 @@ PP(pp_system) if (did_pipes) { int errkid; unsigned n = 0; - SSize_t n1; while (n < sizeof(int)) { - n1 = PerlLIO_read(pp[0], + const SSize_t n1 = PerlLIO_read(pp[0], (void*)(((char*)&errkid)+n), (sizeof(int)) - n); if (n1 <= 0) @@ -4838,7 +4849,6 @@ PP(pp_alarm) PP(pp_sleep) { dSP; dTARGET; - I32 duration; Time_t lasttime; Time_t when; @@ -4846,7 +4856,7 @@ PP(pp_sleep) if (MAXARG < 1 || (!TOPs && !POPs)) PerlProc_pause(); else { - duration = POPi; + const I32 duration = POPi; if (duration < 0) { /* diag_listed_as: %s() with negative argument */ Perl_ck_warner_d(aTHX_ packWARN(WARN_MISC), diff --git a/proto.h b/proto.h index 2fd8a51..fea633f 100644 --- a/proto.h +++ b/proto.h @@ -41,6 +41,9 @@ PERL_CALLCONV void* Perl_Slab_Alloc(pTHX_ size_t sz) PERL_CALLCONV void Perl_Slab_Free(pTHX_ void *op); #define PERL_ARGS_ASSERT_SLAB_FREE \ assert(op) +PERL_CALLCONV char * Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format); +#define PERL_ARGS_ASSERT__BYTE_DUMP_STRING \ + assert(s) PERL_CALLCONV void Perl__force_out_malformed_utf8_message(pTHX_ const U8 *const p, const U8 * const e, const U32 flags, const bool die_here); #define PERL_ARGS_ASSERT__FORCE_OUT_MALFORMED_UTF8_MESSAGE \ assert(p); assert(e) @@ -115,6 +118,9 @@ PERL_CALLCONV UV Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* u #define PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS \ assert(p); assert(ustrp); assert(file) PERL_CALLCONV void Perl__warn_problematic_locale(void); +PERL_CALLCONV void Perl_abort_execution(pTHX_ const char * const msg, const char * const name); +#define PERL_ARGS_ASSERT_ABORT_EXECUTION \ + assert(msg); assert(name) PERL_CALLCONV LOGOP* Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP *other); PERL_CALLCONV PADOFFSET Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags); #define PERL_ARGS_ASSERT_ALLOCMY \ @@ -880,24 +886,15 @@ PERL_CALLCONV PADOFFSET Perl_find_rundefsvoffset(pTHX) PERL_CALLCONV char* Perl_find_script(pTHX_ const char *scriptname, bool dosearch, const char *const *const search_ext, I32 flags); #define PERL_ARGS_ASSERT_FIND_SCRIPT \ assert(scriptname) -PERL_CALLCONV I32 Perl_foldEQ(const char* a, const char* b, I32 len) - __attribute__warn_unused_result__ - __attribute__pure__; +PERL_STATIC_INLINE I32 Perl_foldEQ(const char* a, const char* b, I32 len); #define PERL_ARGS_ASSERT_FOLDEQ \ assert(a); assert(b) - -PERL_CALLCONV I32 Perl_foldEQ_latin1(const char* a, const char* b, I32 len) - __attribute__warn_unused_result__ - __attribute__pure__; +PERL_STATIC_INLINE I32 Perl_foldEQ_latin1(const char* a, const char* b, I32 len); #define PERL_ARGS_ASSERT_FOLDEQ_LATIN1 \ assert(a); assert(b) - -PERL_CALLCONV I32 Perl_foldEQ_locale(const char* a, const char* b, I32 len) - __attribute__warn_unused_result__ - __attribute__pure__; +PERL_STATIC_INLINE I32 Perl_foldEQ_locale(const char* a, const char* b, I32 len); #define PERL_ARGS_ASSERT_FOLDEQ_LOCALE \ assert(a); assert(b) - /* PERL_CALLCONV I32 foldEQ_utf8(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2); */ PERL_CALLCONV I32 Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2, U32 flags); #define PERL_ARGS_ASSERT_FOLDEQ_UTF8_FLAGS \ @@ -2331,6 +2328,7 @@ PERL_CALLCONV_NO_RET void Perl_noperl_die(const char* pat, ...) assert(pat) PERL_CALLCONV int Perl_nothreadhook(pTHX); +PERL_CALLCONV void Perl_notify_parser_that_changed_to_utf8(pTHX); PERL_CALLCONV OP* Perl_oopsAV(pTHX_ OP* o) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_OOPSAV \ @@ -2343,6 +2341,7 @@ PERL_CALLCONV OP* Perl_oopsHV(pTHX_ OP* o) PERL_CALLCONV OP* Perl_op_append_elem(pTHX_ I32 optype, OP* first, OP* last); PERL_CALLCONV OP* Perl_op_append_list(pTHX_ I32 optype, OP* first, OP* last); +PERL_CALLCONV OPclass Perl_op_class(pTHX_ const OP *o); PERL_CALLCONV void Perl_op_clear(pTHX_ OP* o); #define PERL_ARGS_ASSERT_OP_CLEAR \ assert(o) @@ -3074,8 +3073,6 @@ PERL_CALLCONV bool Perl_sv_does_sv(pTHX_ SV* sv, SV* namesv, U32 flags) assert(sv); assert(namesv) PERL_CALLCONV void Perl_sv_dump(pTHX_ SV* sv); -#define PERL_ARGS_ASSERT_SV_DUMP \ - assert(sv) /* PERL_CALLCONV I32 sv_eq(pTHX_ SV* sv1, SV* sv2); */ PERL_CALLCONV I32 Perl_sv_eq_flags(pTHX_ SV* sv1, SV* sv2, const U32 flags); #ifndef NO_MATHOMS @@ -3107,7 +3104,7 @@ PERL_CALLCONV void Perl_sv_insert(pTHX_ SV *const bigstr, const STRLEN offset, c #define PERL_ARGS_ASSERT_SV_INSERT \ assert(bigstr); assert(little) #endif -PERL_CALLCONV void Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *const little, const STRLEN littlelen, const U32 flags); +PERL_CALLCONV void Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags); #define PERL_ARGS_ASSERT_SV_INSERT_FLAGS \ assert(bigstr); assert(little) PERL_CALLCONV int Perl_sv_isa(pTHX_ SV* sv, const char *const name); @@ -3549,7 +3546,9 @@ PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \ assert(s) -/* PERL_CALLCONV UV utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen); */ +PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen); +#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF \ + assert(s); assert(send) PERL_CALLCONV UV Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) __attribute__deprecated__; #define PERL_ARGS_ASSERT_UTF8_TO_UVUNI \ @@ -3694,10 +3693,9 @@ PERL_CALLCONV int Perl_yyerror_pv(pTHX_ const char *const s, U32 flags); #define PERL_ARGS_ASSERT_YYERROR_PV \ assert(s) PERL_CALLCONV int Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags); -#define PERL_ARGS_ASSERT_YYERROR_PVN \ - assert(s) PERL_CALLCONV int Perl_yylex(pTHX); PERL_CALLCONV int Perl_yyparse(pTHX_ int gramtype); +PERL_CALLCONV void Perl_yyquit(pTHX); PERL_CALLCONV void Perl_yyunlex(pTHX); #if !(defined(DEBUGGING)) # if !defined(NV_PRESERVES_UV) @@ -5041,9 +5039,9 @@ STATIC regnode* S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 dept STATIC unsigned int S_regex_set_precedence(const U8 my_operator) __attribute__warn_unused_result__; -STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth); +STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth); #define PERL_ARGS_ASSERT_REGINSERT \ - assert(pRExC_state); assert(opnd) + assert(pRExC_state); assert(operand) STATIC regnode* S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name); #define PERL_ARGS_ASSERT_REGNODE_GUTS \ assert(pRExC_state); assert(name) @@ -5620,9 +5618,6 @@ STATIC bool S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U assert(stash); assert(name) #endif #if defined(PERL_IN_UTF8_C) -STATIC char * S__byte_dump_string(pTHX_ const U8 * s, const STRLEN len); -#define PERL_ARGS_ASSERT__BYTE_DUMP_STRING \ - assert(s) STATIC UV S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, const char *normal, const char *special); #define PERL_ARGS_ASSERT__TO_UTF8_CASE \ assert(p); assert(ustrp); assert(swashp); assert(normal) diff --git a/regcharclass.h b/regcharclass.h index 4be75bc..f66d147 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -1897,7 +1897,7 @@ * 37f6186253da9824bdb27f4ad867bfe8c25d4dc6bdb2f05585e40a034675a348 lib/unicore/extracted/DLineBreak.txt * ef24061b5a5dc93d7e90c2e34530ec757180ee75d872cba65ffc946e52624ae8 lib/unicore/extracted/DNumType.txt * a197371fec9a1b517058b440841f60f9378d81682084eef8db22a88cb2f96e90 lib/unicore/extracted/DNumValues.txt - * 4bcfb4545be21663ca38a2acbfcbf2b0f3252652a34b50f1a56ef76cb959861b lib/unicore/mktables + * 79a7216aceb1d291f2857085545fdda289518bc540a09bc0a15cde105d76028d lib/unicore/mktables * cdecb300baad839a6f62791229f551a4fa33f3cbdca08e378dc976466354e778 lib/unicore/version * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl * 1d27ae8b75d81a082b1fc594673e08540280f8169309a7b5047015c8091a2bfb regen/regcharclass.pl diff --git a/regcomp.c b/regcomp.c index 97888ca..ec7fa3b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -182,9 +182,8 @@ struct RExC_state_t { I32 recode_x_to_native; #endif I32 in_multi_char_class; - struct reg_code_block *code_blocks; /* positions of literal (?{}) + struct reg_code_blocks *code_blocks;/* positions of literal (?{}) within pattern */ - int num_code_blocks; /* size of code_blocks[] */ int code_index; /* next code_blocks[] slot */ SSize_t maxlen; /* mininum possible number of chars in string to match */ scan_frame *frame_head; @@ -819,13 +818,6 @@ static const scan_data_t zero_scan_data = REPORT_LOCATION_ARGS(loc)); \ } STMT_END -#define vWARN4dep(loc, m, a1, a2, a3) STMT_START { \ - __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN2(WARN_REGEXP,WARN_DEPRECATED), \ - m REPORT_LOCATION, \ - a1, a2, a3, \ - REPORT_LOCATION_ARGS(loc)); \ -} STMT_END - #define ckWARN4reg(loc, m, a1, a2, a3) STMT_START { \ __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ @@ -5219,15 +5211,21 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, However, this time it's not a subexpression we care about, but the expression itself. */ && (maxcount == REG_INFTY) - && data && ++data->whilem_c < 16) { + && data) { /* This stays as CURLYX, we can put the count/of pair. */ /* Find WHILEM (as in regexec.c) */ regnode *nxt = oscan + NEXT_OFF(oscan); if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */ nxt += ARG(nxt); - PREVOPER(nxt)->flags = (U8)(data->whilem_c - | (RExC_whilem_seen << 4)); /* On WHILEM */ + nxt = PREVOPER(nxt); + if (nxt->flags & 0xf) { + /* we've already set whilem count on this node */ + } else if (++data->whilem_c < 16) { + assert(data->whilem_c <= RExC_whilem_seen); + nxt->flags = (U8)(data->whilem_c + | (RExC_whilem_seen << 4)); /* On WHILEM */ + } } if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) pars++; @@ -6129,6 +6127,39 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags) } +static void +S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs) +{ + int n; + + if (--cbs->refcnt > 0) + return; + for (n = 0; n < cbs->count; n++) { + REGEXP *rx = cbs->cb[n].src_regex; + cbs->cb[n].src_regex = NULL; + SvREFCNT_dec(rx); + } + Safefree(cbs->cb); + Safefree(cbs); +} + + +static struct reg_code_blocks * +S_alloc_code_blocks(pTHX_ int ncode) +{ + struct reg_code_blocks *cbs; + Newx(cbs, 1, struct reg_code_blocks); + cbs->count = ncode; + cbs->refcnt = 1; + SAVEDESTRUCTOR_X(S_free_codeblocks, cbs); + if (ncode) + Newx(cbs->cb, ncode, struct reg_code_block); + else + cbs->cb = NULL; + return cbs; +} + + /* upgrade pattern pat_p of length plen_p to UTF8, and if there are code * blocks, recalculate the indices. Update pat_p and plen_p in-place to * point to the realloced string and length. @@ -6155,14 +6186,16 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state, while (s < *plen_p) { append_utf8_from_native_byte(src[s], &d); + if (n < num_code_blocks) { - if (!do_end && pRExC_state->code_blocks[n].start == s) { - pRExC_state->code_blocks[n].start = d - dst - 1; + assert(pRExC_state->code_blocks); + if (!do_end && pRExC_state->code_blocks->cb[n].start == s) { + pRExC_state->code_blocks->cb[n].start = d - dst - 1; assert(*(d - 1) == '('); do_end = 1; } - else if (do_end && pRExC_state->code_blocks[n].end == s) { - pRExC_state->code_blocks[n].end = d - dst - 1; + else if (do_end && pRExC_state->code_blocks->cb[n].end == s) { + pRExC_state->code_blocks->cb[n].end = d - dst - 1; assert(*(d - 1) == ')'); do_end = 0; n++; @@ -6282,10 +6315,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, if (oplist->op_type == OP_NULL && (oplist->op_flags & OPf_SPECIAL)) { - assert(n < pRExC_state->num_code_blocks); - pRExC_state->code_blocks[n].start = pat ? SvCUR(pat) : 0; - pRExC_state->code_blocks[n].block = oplist; - pRExC_state->code_blocks[n].src_regex = NULL; + assert(n < pRExC_state->code_blocks->count); + pRExC_state->code_blocks->cb[n].start = pat ? SvCUR(pat) : 0; + pRExC_state->code_blocks->cb[n].block = oplist; + pRExC_state->code_blocks->cb[n].src_regex = NULL; n++; code = 1; oplist = OpSIBLING(oplist); /* skip CONST */ @@ -6315,7 +6348,8 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, sv_setsv(pat, sv); /* overloading involved: all bets are off over literal * code. Pretend we haven't seen it */ - pRExC_state->num_code_blocks -= n; + if (n) + pRExC_state->code_blocks->count -= n; n = 0; } else { @@ -6365,7 +6399,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, } if (code) - pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1; + pRExC_state->code_blocks->cb[n-1].end = SvCUR(pat)-1; } /* extract any code blocks within any embedded qr//'s */ @@ -6374,25 +6408,30 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, { RXi_GET_DECL(ReANY((REGEXP *)rx), ri); - if (ri->num_code_blocks) { + if (ri->code_blocks && ri->code_blocks->count) { int i; /* the presence of an embedded qr// with code means * we should always recompile: the text of the * qr// may not have changed, but it may be a * different closure than last time */ *recompile_p = 1; - Renew(pRExC_state->code_blocks, - pRExC_state->num_code_blocks + ri->num_code_blocks, - struct reg_code_block); - pRExC_state->num_code_blocks += ri->num_code_blocks; + if (pRExC_state->code_blocks) { + pRExC_state->code_blocks->count += ri->code_blocks->count; + Renew(pRExC_state->code_blocks->cb, + pRExC_state->code_blocks->count, + struct reg_code_block); + } + else + pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_ + ri->code_blocks->count); - for (i=0; i < ri->num_code_blocks; i++) { + for (i=0; i < ri->code_blocks->count; i++) { struct reg_code_block *src, *dst; STRLEN offset = orig_patlen + ReANY((REGEXP *)rx)->pre_prefix; - assert(n < pRExC_state->num_code_blocks); - src = &ri->code_blocks[i]; - dst = &pRExC_state->code_blocks[n]; + assert(n < pRExC_state->code_blocks->count); + src = &ri->code_blocks->cb[i]; + dst = &pRExC_state->code_blocks->cb[n]; dst->start = src->start + offset; dst->end = src->end + offset; dst->block = src->block; @@ -6427,10 +6466,11 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, PERL_UNUSED_CONTEXT; for (s = 0; s < plen; s++) { - if (n < pRExC_state->num_code_blocks - && s == pRExC_state->code_blocks[n].start) + if ( pRExC_state->code_blocks + && n < pRExC_state->code_blocks->count + && s == pRExC_state->code_blocks->cb[n].start) { - s = pRExC_state->code_blocks[n].end; + s = pRExC_state->code_blocks->cb[n].end; n++; continue; } @@ -6490,7 +6530,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, int n = 0; STRLEN s; char *p, *newpat; - int newlen = plen + 6; /* allow for "qr''x\0" extra chars */ + int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */ SV *sv, *qr_ref; dSP; @@ -6505,12 +6545,13 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, *p++ = 'q'; *p++ = 'r'; *p++ = '\''; for (s = 0; s < plen; s++) { - if (n < pRExC_state->num_code_blocks - && s == pRExC_state->code_blocks[n].start) + if ( pRExC_state->code_blocks + && n < pRExC_state->code_blocks->count + && s == pRExC_state->code_blocks->cb[n].start) { /* blank out literal code block */ assert(pat[s] == '('); - while (s <= pRExC_state->code_blocks[n].end) { + while (s <= pRExC_state->code_blocks->cb[n].end) { *p++ = '_'; s++; } @@ -6554,11 +6595,8 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, { SV * const errsv = ERRSV; if (SvTRUE_NN(errsv)) - { - Safefree(pRExC_state->code_blocks); /* use croak_sv ? */ Perl_croak_nocontext("%" SVf, SVfARG(errsv)); - } } assert(SvROK(qr_ref)); qr = SvRV(qr_ref); @@ -6590,42 +6628,46 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, struct reg_code_block *new_block, *dst; RExC_state_t * const r1 = pRExC_state; /* convenient alias */ int i1 = 0, i2 = 0; + int r1c, r2c; - if (!r2->num_code_blocks) /* we guessed wrong */ + if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */ { SvREFCNT_dec_NN(qr); return 1; } - Newx(new_block, - r1->num_code_blocks + r2->num_code_blocks, - struct reg_code_block); + if (!r1->code_blocks) + r1->code_blocks = S_alloc_code_blocks(aTHX_ 0); + + r1c = r1->code_blocks->count; + r2c = r2->code_blocks->count; + + Newx(new_block, r1c + r2c, struct reg_code_block); + dst = new_block; - while ( i1 < r1->num_code_blocks - || i2 < r2->num_code_blocks) - { + while (i1 < r1c || i2 < r2c) { struct reg_code_block *src; bool is_qr = 0; - if (i1 == r1->num_code_blocks) { - src = &r2->code_blocks[i2++]; + if (i1 == r1c) { + src = &r2->code_blocks->cb[i2++]; is_qr = 1; } - else if (i2 == r2->num_code_blocks) - src = &r1->code_blocks[i1++]; - else if ( r1->code_blocks[i1].start - < r2->code_blocks[i2].start) + else if (i2 == r2c) + src = &r1->code_blocks->cb[i1++]; + else if ( r1->code_blocks->cb[i1].start + < r2->code_blocks->cb[i2].start) { - src = &r1->code_blocks[i1++]; - assert(src->end < r2->code_blocks[i2].start); + src = &r1->code_blocks->cb[i1++]; + assert(src->end < r2->code_blocks->cb[i2].start); } else { - assert( r1->code_blocks[i1].start - > r2->code_blocks[i2].start); - src = &r2->code_blocks[i2++]; + assert( r1->code_blocks->cb[i1].start + > r2->code_blocks->cb[i2].start); + src = &r2->code_blocks->cb[i2++]; is_qr = 1; - assert(src->end < r1->code_blocks[i1].start); + assert(src->end < r1->code_blocks->cb[i1].start); } assert(pat[src->start] == '('); @@ -6637,9 +6679,9 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, : src->src_regex; dst++; } - r1->num_code_blocks += r2->num_code_blocks; - Safefree(r1->code_blocks); - r1->code_blocks = new_block; + r1->code_blocks->count += r2c; + Safefree(r1->code_blocks->cb); + r1->code_blocks->cb = new_block; } SvREFCNT_dec_NN(qr); @@ -6758,7 +6800,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, SSize_t minlen = 0; U32 rx_flags; SV *pat; - SV *code_blocksv = NULL; SV** new_patternp = patternp; /* these are all flags - maybe they should be turned @@ -6816,7 +6857,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, pRExC_state->warn_text = NULL; pRExC_state->code_blocks = NULL; - pRExC_state->num_code_blocks = 0; if (is_bare_re) *is_bare_re = FALSE; @@ -6830,10 +6870,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) ncode++; /* count of DO blocks */ - if (ncode) { - pRExC_state->num_code_blocks = ncode; - Newx(pRExC_state->code_blocks, ncode, struct reg_code_block); - } + + if (ncode) + pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_ ncode); } if (!pat_count) { @@ -6877,7 +6916,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* set expr to the first arg op */ - if (pRExC_state->num_code_blocks + if (pRExC_state->code_blocks && pRExC_state->code_blocks->count && expr->op_type != OP_CONST) { expr = cLISTOPx(expr)->op_first; @@ -6899,7 +6938,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (is_bare_re) *is_bare_re = TRUE; SvREFCNT_inc(re); - Safefree(pRExC_state->code_blocks); DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Precompiled pattern%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); @@ -6919,7 +6957,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, pat = newSVpvn_flags(exp, plen, SVs_TEMP | (IN_BYTES ? 0 : SvUTF8(pat))); } - Safefree(pRExC_state->code_blocks); return CALLREGCOMP_ENG(eng, pat, orig_rx_flags); } @@ -6974,7 +7011,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, && memEQ(RX_PRECOMP(old_re), exp, plen) && !runtime_code /* with runtime code, always recompile */ ) { - Safefree(pRExC_state->code_blocks); return old_re; } @@ -7003,7 +7039,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* whoops, we have a non-utf8 pattern, whilst run-time code * got compiled as utf8. Try again with a utf8 pattern */ S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen, - pRExC_state->num_code_blocks); + pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0); goto redo_first_pass; } } @@ -7059,17 +7095,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_lastnum=0; RExC_lastparse=NULL; ); - /* reg may croak on us, not giving us a chance to free - pRExC_state->code_blocks. We cannot SAVEFREEPV it now, as we may - need it to survive as long as the regexp (qr/(?{})/). - We must check that code_blocksv is not already set, because we may - have jumped back to restart the sizing pass. */ - if (pRExC_state->code_blocks && !code_blocksv) { - code_blocksv = newSV_type(SVt_PV); - SAVEFREESV(code_blocksv); - SvPV_set(code_blocksv, (char *)pRExC_state->code_blocks); - SvLEN_set(code_blocksv, 1); /*sufficient to make sv_clear free it*/ - } + if (reg(pRExC_state, 0, &flags,1) == NULL) { /* It's possible to write a regexp in ascii that represents Unicode codepoints outside of the byte range, such as via \x{100}. If we @@ -7082,7 +7108,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (flags & RESTART_PASS1) { if (flags & NEED_UTF8) { S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen, - pRExC_state->num_code_blocks); + pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0); } else { DEBUG_PARSE_r(Perl_re_printf( aTHX_ @@ -7093,8 +7119,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags); } - if (code_blocksv) - SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */ DEBUG_PARSE_r({ Perl_re_printf( aTHX_ @@ -7147,16 +7171,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (pm_flags & PMf_IS_QR) { ri->code_blocks = pRExC_state->code_blocks; - ri->num_code_blocks = pRExC_state->num_code_blocks; - } - else - { - int n; - for (n = 0; n < pRExC_state->num_code_blocks; n++) - if (pRExC_state->code_blocks[n].src_regex) - SAVEFREESV(pRExC_state->code_blocks[n].src_regex); - if(pRExC_state->code_blocks) - SAVEFREEPV(pRExC_state->code_blocks); /* often null */ + if (ri->code_blocks) + ri->code_blocks->refcnt++; } { @@ -7435,7 +7451,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, !sawlookahead && (OP(first) == STAR && PL_regkind[OP(NEXTOPER(first))] == REG_ANY) && - !(r->intflags & PREGf_ANCH) && !pRExC_state->num_code_blocks) + !(r->intflags & PREGf_ANCH) && !pRExC_state->code_blocks) { /* turn .* into ^.* with an implied $*=1 */ const int type = @@ -7448,7 +7464,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } if (sawplus && !sawminmod && !sawlookahead && (!sawopen || !RExC_sawback) - && !pRExC_state->num_code_blocks) /* May examine pos and $& */ + && !pRExC_state->code_blocks) /* May examine pos and $& */ /* x+ must match at the 1st pos of run of x's */ r->intflags |= PREGf_SKIP; @@ -7704,7 +7720,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (RExC_seen & REG_LOOKBEHIND_SEEN) r->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the lookbehind */ - if (pRExC_state->num_code_blocks) + if (pRExC_state->code_blocks) r->extflags |= RXf_EVAL_SEEN; if (RExC_seen & REG_VERBARG_SEEN) { @@ -7776,6 +7792,18 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, while ( RExC_recurse_count > 0 ) { const regnode *scan = RExC_recurse[ --RExC_recurse_count ]; + /* + * This data structure is set up in study_chunk() and is used + * to calculate the distance between a GOSUB regopcode and + * the OPEN/CURLYM (CURLYM's are special and can act like OPEN's) + * it refers to. + * + * If for some reason someone writes code that optimises + * away a GOSUB opcode then the assert should be changed to + * an if(scan) to guard the ARG2L_SET() - Yves + * + */ + assert(scan && OP(scan) == GOSUB); ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan ); } @@ -10228,7 +10256,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node) { AV* list = (AV*) *listp; IV k; - for (k = 0; k <= av_tindex_nomg(list); k++) { + for (k = 0; k <= av_tindex_skip_len_mg(list); k++) { SV** c_p = av_fetch(list, k, FALSE); UV c; assert(c_p); @@ -11004,9 +11032,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_seen_zerolen++; - if ( !pRExC_state->num_code_blocks - || pRExC_state->code_index >= pRExC_state->num_code_blocks - || pRExC_state->code_blocks[pRExC_state->code_index].start + if ( !pRExC_state->code_blocks + || pRExC_state->code_index + >= pRExC_state->code_blocks->count + || pRExC_state->code_blocks->cb[pRExC_state->code_index].start != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0)) - RExC_start) ) { @@ -11015,7 +11044,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) FAIL("Eval-group not allowed at runtime, use re 'eval'"); } /* this is a pre-compiled code block (?{...}) */ - cb = &pRExC_state->code_blocks[pRExC_state->code_index]; + cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index]; RExC_parse = RExC_start + cb->end; if (!SIZE_ONLY) { OP *o = cb->block; @@ -11695,19 +11724,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) nextchar(pRExC_state); if (max < min) { /* If can't match, warn and optimize to fail unconditionally */ - if (SIZE_ONLY) { - - /* We can't back off the size because we have to reserve - * enough space for all the things we are about to throw - * away, but we can shrink it by the amount we are about - * to re-use here */ - RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL]; - } - else { + reginsert(pRExC_state, OPFAIL, orig_emit, depth+1); + if (PASS2) { ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match"); - RExC_emit = orig_emit; + NEXT_OFF(orig_emit)= regarglen[OPFAIL] + NODE_STEP_REGNODE; } - ret = reganode(pRExC_state, OPFAIL, 0); return ret; } else if (min == max && *RExC_parse == '?') @@ -14852,7 +14873,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, no_close: /* We output the messages even if warnings are off, because we'll fail * the very next thing, and these give a likely diagnosis for that */ - if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) { + if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) { output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL); } @@ -14967,7 +14988,7 @@ redo_curchar: stack, fence, fence_stack)); #endif - top_index = av_tindex_nomg(stack); + top_index = av_tindex_skip_len_mg(stack); switch (curchar) { SV** stacked_ptr; /* Ptr to something already on 'stack' */ @@ -15145,7 +15166,7 @@ redo_curchar: goto done; case ')': - if (av_tindex_nomg(fence_stack) < 0) { + if (av_tindex_skip_len_mg(fence_stack) < 0) { RExC_parse++; vFAIL("Unexpected ')'"); } @@ -15341,7 +15362,7 @@ redo_curchar: * may have altered the stack in the time since we earlier set * 'top_index'. */ - top_index = av_tindex_nomg(stack); + top_index = av_tindex_skip_len_mg(stack); if (top_index - fence >= 0) { /* If the top entry on the stack is an operator, it had better * be a '!', otherwise the entry below the top operand should @@ -15392,15 +15413,15 @@ redo_curchar: } /* End of loop parsing through the construct */ done: - if (av_tindex_nomg(fence_stack) >= 0) { + if (av_tindex_skip_len_mg(fence_stack) >= 0) { vFAIL("Unmatched ("); } - if (av_tindex_nomg(stack) < 0 /* Was empty */ + if (av_tindex_skip_len_mg(stack) < 0 /* Was empty */ || ((final = av_pop(stack)) == NULL) || ! IS_OPERAND(final) || SvTYPE(final) != SVt_INVLIST - || av_tindex_nomg(stack) >= 0) /* More left on stack */ + || av_tindex_skip_len_mg(stack) >= 0) /* More left on stack */ { bad_syntax: SvREFCNT_dec(final); @@ -15503,8 +15524,8 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state, AV * stack, const IV fence, AV * fence_stack) { /* Dumps the stacks in handle_regex_sets() */ - const SSize_t stack_top = av_tindex_nomg(stack); - const SSize_t fence_stack_top = av_tindex_nomg(fence_stack); + const SSize_t stack_top = av_tindex_skip_len_mg(stack); + const SSize_t fence_stack_top = av_tindex_skip_len_mg(fence_stack); SSize_t i; PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES; @@ -15958,7 +15979,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, while (1) { if ( posix_warnings - && av_tindex_nomg(posix_warnings) >= 0 + && av_tindex_skip_len_mg(posix_warnings) >= 0 && RExC_parse > not_posix_region_end) { /* Warnings about posix class issues are considered tentative until @@ -16014,7 +16035,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * posix class, and it failed, it was a false alarm, as this * successful one proves */ if ( posix_warnings - && av_tindex_nomg(posix_warnings) >= 0 + && av_tindex_skip_len_mg(posix_warnings) >= 0 && not_posix_region_end >= RExC_parse && not_posix_region_end <= posix_class_end) { @@ -16905,22 +16926,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, literal[d++] = (char) value; literal[d++] = '\0'; - vWARN4dep(RExC_parse, - "\"%.*s\" is more clearly written simply as \"%s\". " - "This will be a fatal error in Perl 5.28", + vWARN4(RExC_parse, + "\"%.*s\" is more clearly written simply as \"%s\"", (int) (RExC_parse - rangebegin), rangebegin, literal - ); + ); } else if isMNEMONIC_CNTRL(value) { - vWARN4dep(RExC_parse, - "\"%.*s\" is more clearly written simply as \"%s\". " - "This will be a fatal error in Perl 5.28", + vWARN4(RExC_parse, + "\"%.*s\" is more clearly written simply as \"%s\"", (int) (RExC_parse - rangebegin), rangebegin, cntrl_to_mnemonic((U8) value) - ); + ); } } } @@ -16973,7 +16992,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, } /* End of loop through all the text within the brackets */ - if ( posix_warnings && av_tindex_nomg(posix_warnings) >= 0) { + if ( posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) { output_or_return_posix_warnings(pRExC_state, posix_warnings, return_posix_warnings); } @@ -17006,7 +17025,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, #endif /* Look at the longest folds first */ - for (cp_count = av_tindex_nomg(multi_char_matches); + for (cp_count = av_tindex_skip_len_mg(multi_char_matches); cp_count > 0; cp_count--) { @@ -17388,7 +17407,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, { AV* list = (AV*) *listp; IV k; - for (k = 0; k <= av_tindex_nomg(list); k++) { + for (k = 0; k <= av_tindex_skip_len_mg(list); k++) { SV** c_p = av_fetch(list, k, FALSE); UV c; assert(c_p); @@ -18079,7 +18098,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, si = *ary; /* ary[0] = the string to initialize the swash with */ - if (av_tindex_nomg(av) >= 2) { + if (av_tindex_skip_len_mg(av) >= 2) { if (only_utf8_locale_ptr && ary[2] && ary[2] != &PL_sv_undef) @@ -18095,7 +18114,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, * is any inversion list generated at compile time; [4] * indicates if that inversion list has any user-defined * properties in it. */ - if (av_tindex_nomg(av) >= 3) { + if (av_tindex_skip_len_mg(av) >= 3) { invlist = ary[3]; if (SvUV(ary[4])) { swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY; @@ -18485,9 +18504,17 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. +* +* IMPORTANT NOTE - it is the *callers* responsibility to correctly +* set up NEXT_OFF() of the inserted node if needed. Something like this: +* +* reginsert(pRExC, OPFAIL, orig_emit, depth+1); +* if (PASS2) +* NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE; +* */ STATIC void -S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) +S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth) { regnode *src; regnode *dst; @@ -18521,13 +18548,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) /* note, RExC_open_parens[0] is the start of the * regex, it can't move. RExC_close_parens[0] is the end * of the regex, it *can* move. */ - if ( paren && RExC_open_parens[paren] >= opnd ) { + if ( paren && RExC_open_parens[paren] >= operand ) { /*DEBUG_PARSE_FMT("open"," - %d",size);*/ RExC_open_parens[paren] += size; } else { /*DEBUG_PARSE_FMT("open"," - %s","ok");*/ } - if ( RExC_close_parens[paren] >= opnd ) { + if ( RExC_close_parens[paren] >= operand ) { /*DEBUG_PARSE_FMT("close"," - %d",size);*/ RExC_close_parens[paren] += size; } else { @@ -18538,7 +18565,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) if (RExC_end_op) RExC_end_op += size; - while (src > opnd) { + while (src > operand) { StructCopy(--src, --dst, regnode); #ifdef RE_TRACK_PATTERN_OFFSETS if (RExC_offsets) { /* MJD 20010112 */ @@ -18559,7 +18586,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) } - place = opnd; /* Op node, where operand used to be. */ + place = operand; /* Op node, where operand used to be. */ #ifdef RE_TRACK_PATTERN_OFFSETS if (RExC_offsets) { /* MJD */ MJD_OFFSET_DEBUG( @@ -19507,12 +19534,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) if (ri->u.offsets) Safefree(ri->u.offsets); /* 20010421 MJD */ #endif - if (ri->code_blocks) { - int n; - for (n = 0; n < ri->num_code_blocks; n++) - SvREFCNT_dec(ri->code_blocks[n].src_regex); - Safefree(ri->code_blocks); - } + if (ri->code_blocks) + S_free_codeblocks(aTHX_ ri->code_blocks); if (ri->data) { int n = ri->data->count; @@ -19729,16 +19752,18 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) Copy(ri->program, reti->program, len+1, regnode); - reti->num_code_blocks = ri->num_code_blocks; if (ri->code_blocks) { int n; - Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block, - struct reg_code_block); - Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks, - struct reg_code_block); - for (n = 0; n < ri->num_code_blocks; n++) - reti->code_blocks[n].src_regex = (REGEXP*) - sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param); + Newx(reti->code_blocks, 1, struct reg_code_blocks); + Newx(reti->code_blocks->cb, ri->code_blocks->count, + struct reg_code_block); + Copy(ri->code_blocks->cb, reti->code_blocks->cb, + ri->code_blocks->count, struct reg_code_block); + for (n = 0; n < ri->code_blocks->count; n++) + reti->code_blocks->cb[n].src_regex = (REGEXP*) + sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param); + reti->code_blocks->count = ri->code_blocks->count; + reti->code_blocks->refcnt = 1; } else reti->code_blocks = NULL; diff --git a/regcomp.h b/regcomp.h index ec0c9f8..14599fa 100644 --- a/regcomp.h +++ b/regcomp.h @@ -107,8 +107,7 @@ Used to make it easier to clone and free arbitrary data that the regops need. Often the ARG field of a regop is an index into this structure */ - struct reg_code_block *code_blocks;/* positions of literal (?{}) */ - int num_code_blocks; /* size of code_blocks[] */ + struct reg_code_blocks *code_blocks;/* positions of literal (?{}) */ regnode program[1]; /* Unwarranted chumminess with compiler. */ } regexp_internal; diff --git a/regcomp.sym b/regcomp.sym index ac67955..999d965 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -243,7 +243,7 @@ PSEUDO PSEUDO, off ; Pseudo opcode for internal use. # # TRIE next:FAIL -EVAL AB:FAIL +EVAL B,postponed_AB:FAIL CURLYX end:FAIL WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL BRANCH next:FAIL diff --git a/regen/feature.pl b/regen/feature.pl index 66fc017..579120e 100755 --- a/regen/feature.pl +++ b/regen/feature.pl @@ -367,7 +367,7 @@ read_only_bottom_close_and_rename($h); __END__ package feature; -our $VERSION = '1.46'; +our $VERSION = '1.47'; FEATURES @@ -490,50 +490,22 @@ operator|perlop/Range Operators>. =head2 The 'unicode_eval' and 'evalbytes' features -Under the C feature, Perl's C function, when passed a -string, will evaluate it as a string of characters, ignoring any -C declarations. C exists to declare the encoding of -the script, which only makes sense for a stream of bytes, not a string of -characters. Source filters are forbidden, as they also really only make -sense on strings of bytes. Any attempt to activate a source filter will -result in an error. - -The C feature enables the C keyword, which evaluates -the argument passed to it as a string of bytes. It dies if the string -contains any characters outside the 8-bit range. Source filters work -within C: they apply to the contents of the string being -evaluated. - -Together, these two features are intended to replace the historical C -function, which has (at least) two bugs in it, that cannot easily be fixed -without breaking existing programs: - -=over - -=item * - -C behaves differently depending on the internal encoding of the -string, sometimes treating its argument as a string of bytes, and sometimes -as a string of characters. - -=item * - -Source filters activated within C leak out into whichever I -scope is currently being compiled. To give an example with the CPAN module -L: - - BEGIN { eval "use Semi::Semicolons; # not filtered here " } - # filtered here! - -C fixes that to work the way one would expect: - - use feature "evalbytes"; - BEGIN { evalbytes "use Semi::Semicolons; # filtered " } - # not filtered - -=back - -These two features are available starting with Perl 5.16. +Together, these two features are intended to replace the legacy string +C function, which behaves problematically in some instances. They are +available starting with Perl 5.16, and are enabled by default by a +S> or higher declaration. + +C changes the behavior of plain string C to work more +consistently, especially in the Unicode world. Certain (mis)behaviors +couldn't be changed without breaking some things that had come to rely on +them, so the feature can be enabled and disabled. Details are at +L. + +C is like string C, but operating on a byte stream that is +not UTF-8 encoded. Details are at L. Without a +S> nor a S> (or higher) declaration in +the current scope, you can still access it by instead writing +C. =head2 The 'current_sub' feature diff --git a/regen/warnings.pl b/regen/warnings.pl index 83bf8bc..5721c17 100644 --- a/regen/warnings.pl +++ b/regen/warnings.pl @@ -462,7 +462,8 @@ is by default enabled even if not within the scope of S>. #define unpackWARN4(x) (((x) >>24) & 0xFF) #define ckDEAD(x) \ - ( ! specialWARN(PL_curcop->cop_warnings) && \ + (PL_curcop && \ + !specialWARN(PL_curcop->cop_warnings) && \ ( isWARNf_on(PL_curcop->cop_warnings, WARN_ALL) || \ isWARNf_on(PL_curcop->cop_warnings, unpackWARN1(x)) || \ isWARNf_on(PL_curcop->cop_warnings, unpackWARN2(x)) || \ diff --git a/regexec.c b/regexec.c index 811eca2..82128a7 100644 --- a/regexec.c +++ b/regexec.c @@ -4181,7 +4181,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, } else { /* Does participate in folds */ AV* list = (AV*) *listp; - if (av_tindex_nomg(list) != 1) { + if (av_tindex_skip_len_mg(list) != 1) { /* If there aren't exactly two folds to this, it is * outside the scope of this function */ @@ -5391,12 +5391,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) U8 gimme = G_SCALAR; CV *caller_cv = NULL; /* who called us */ CV *last_pushed_cv = NULL; /* most recently called (?{}) CV */ - CHECKPOINT runops_cp; /* savestack position before executing EVAL */ U32 maxopenparen = 0; /* max '(' index seen so far */ int to_complement; /* Invert the result? */ _char_class_number classnum; bool is_utf8_pat = reginfo->is_utf8_pat; bool match = FALSE; + I32 orig_savestack_ix = PL_savestack_ix; /* Solaris Studio 12.3 messes up fetching PL_charclass['\n'] */ #if (defined(__SUNPRO_C) && (__SUNPRO_C == 0x5120) && defined(__x86_64) && defined(USE_64_BIT_ALL)) @@ -5736,6 +5736,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) { U8 *uc; if ( ST.jump ) { + /* undo any captures done in the tail part of a branch, + * e.g. + * /(?:X(.)(.)|Y(.)).../ + * where the trie just matches X then calls out to do the + * rest of the branch */ REGCP_UNWIND(ST.cp); UNWIND_PAREN(ST.lastparen, ST.lastcloseparen); } @@ -6786,7 +6791,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) goto eval_recurse_doit; /* NOTREACHED */ - case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */ + case EVAL: /* /(?{...})B/ /(??{A})B/ and /(?(?{...})X|Y)B/ */ if (cur_eval && cur_eval->locinput==locinput) { if ( ++nochange_depth > max_nochange_depth ) Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex"); @@ -6805,7 +6810,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) /* save *all* paren positions */ regcppush(rex, 0, maxopenparen); - REGCP_SET(runops_cp); + REGCP_SET(ST.lastcp); if (!caller_cv) caller_cv = find_runcv(NULL); @@ -6830,30 +6835,67 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) nop = (OP*)rexi->data->data[n]; } - /* normally if we're about to execute code from the same - * CV that we used previously, we just use the existing - * CX stack entry. However, its possible that in the - * meantime we may have backtracked, popped from the save - * stack, and undone the SAVECOMPPAD(s) associated with - * PUSH_MULTICALL; in which case PL_comppad no longer - * points to newcv's pad. */ + /* Some notes about MULTICALL and the context and save stacks. + * + * In something like + * /...(?{ my $x)}...(?{ my $y)}...(?{ my $z)}.../ + * since codeblocks don't introduce a new scope (so that + * local() etc accumulate), at the end of a successful + * match there will be a SAVEt_CLEARSV on the savestack + * for each of $x, $y, $z. If the three code blocks above + * happen to have come from different CVs (e.g. via + * embedded qr//s), then we must ensure that during any + * savestack unwinding, PL_comppad always points to the + * right pad at each moment. We achieve this by + * interleaving SAVEt_COMPPAD's on the savestack whenever + * there is a change of pad. + * In theory whenever we call a code block, we should + * push a CXt_SUB context, then pop it on return from + * that code block. This causes a bit of an issue in that + * normally popping a context also clears the savestack + * back to cx->blk_oldsaveix, but here we specifically + * don't want to clear the save stack on exit from the + * code block. + * Also for efficiency we don't want to keep pushing and + * popping the single SUB context as we backtrack etc. + * So instead, we push a single context the first time + * we need, it, then hang onto it until the end of this + * function. Whenever we encounter a new code block, we + * update the CV etc if that's changed. During the times + * in this function where we're not executing a code + * block, having the SUB context still there is a bit + * naughty - but we hope that no-one notices. + * When the SUB context is initially pushed, we fake up + * cx->blk_oldsaveix to be as if we'd pushed this context + * on first entry to S_regmatch rather than at some random + * point during the regexe execution. That way if we + * croak, popping the context stack will ensure that + * *everything* SAVEd by this function is undone and then + * the context popped, rather than e.g., popping the + * context (and restoring the original PL_comppad) then + * popping more of the savestack and restoring a bad + * PL_comppad. + */ + + /* If this is the first EVAL, push a MULTICALL. On + * subsequent calls, if we're executing a different CV, or + * if PL_comppad has got messed up from backtracking + * through SAVECOMPPADs, then refresh the context. + */ if (newcv != last_pushed_cv || PL_comppad != last_pad) { U8 flags = (CXp_SUB_RE | ((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0)); + SAVECOMPPAD(); if (last_pushed_cv) { - /* PUSH/POP_MULTICALL save and restore the - * caller's PL_comppad; if we call multiple subs - * using the same CX block, we have to save and - * unwind the varying PL_comppad's ourselves, - * especially restoring the right PL_comppad on - * backtrack - so save it on the save stack */ - SAVECOMPPAD(); CHANGE_MULTICALL_FLAGS(newcv, flags); } else { PUSH_MULTICALL_FLAGS(newcv, flags); } + /* see notes above */ + CX_CUR()->blk_oldsaveix = orig_savestack_ix; + last_pushed_cv = newcv; } else { @@ -6970,12 +7012,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) * in the regexp code uses the pad ! */ PL_op = oop; PL_curcop = ocurcop; - regcp_restore(rex, runops_cp, &maxopenparen); + regcp_restore(rex, ST.lastcp, &maxopenparen); PL_curpm_under = PL_curpm; PL_curpm = PL_reg_curpm; - if (logical != 2) - break; + if (logical != 2) { + PUSH_STATE_GOTO(EVAL_B, next, locinput); + /* NOTREACHED */ + } } /* only /(??{})/ from now on */ @@ -7073,11 +7117,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) ST.prev_eval = cur_eval; cur_eval = st; /* now continue from first node in postoned RE */ - PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput); + PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput); NOT_REACHED; /* NOTREACHED */ } - case EVAL_AB: /* cleanup after a successful (??{A})B */ + case EVAL_postponed_AB: /* cleanup after a successful (??{A})B */ /* note: this is called twice; first after popping B, then A */ DEBUG_STACK_r({ Perl_re_exec_indentf( aTHX_ "EVAL_AB cur_eval=%p prev_eval=%p\n", @@ -7123,7 +7167,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) sayYES; - case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */ + case EVAL_B_fail: /* unsuccessful B in (?{...})B */ + REGCP_UNWIND(ST.lastcp); + sayNO; + + case EVAL_postponed_AB_fail: /* unsuccessfully ran A or B in (??{A})B */ /* note: this is called twice; first after popping B, then A */ DEBUG_STACK_r({ Perl_re_exec_indentf( aTHX_ "EVAL_AB_fail cur_eval=%p prev_eval=%p\n", @@ -7523,9 +7571,6 @@ NULL if (cur_curlyx->u.curlyx.minmod) { ST.save_curlyx = cur_curlyx; cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx; - ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor, - maxopenparen); - REGCP_SET(ST.lastcp); PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B, locinput); NOT_REACHED; /* NOTREACHED */ @@ -7558,11 +7603,11 @@ NULL CACHEsayNO; NOT_REACHED; /* NOTREACHED */ - case WHILEM_A_min_fail: /* just failed to match A in a minimal match */ - /* FALLTHROUGH */ case WHILEM_A_pre_fail: /* just failed to match even minimal A */ REGCP_UNWIND(ST.lastcp); regcppop(rex, &maxopenparen); + /* FALLTHROUGH */ + case WHILEM_A_min_fail: /* just failed to match A in a minimal match */ cur_curlyx->u.curlyx.lastloc = ST.save_lastloc; cur_curlyx->u.curlyx.count--; CACHEsayNO; @@ -7595,8 +7640,6 @@ NULL case WHILEM_B_min_fail: /* just failed to match B in a minimal match */ cur_curlyx = ST.save_curlyx; - REGCP_UNWIND(ST.lastcp); - regcppop(rex, &maxopenparen); if (cur_curlyx->u.curlyx.count >= /*max*/ARG2(cur_curlyx->u.curlyx.me)) { /* Maximum greed exceeded */ @@ -7618,9 +7661,6 @@ NULL ); /* Try grabbing another A and see if it helps. */ cur_curlyx->u.curlyx.lastloc = locinput; - ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, - maxopenparen); - REGCP_SET(ST.lastcp); PUSH_STATE_GOTO(WHILEM_A_min, /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS, locinput); @@ -8225,7 +8265,7 @@ NULL SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput); - PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B, + PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B, locinput); /* match B */ } @@ -8453,7 +8493,8 @@ NULL assert(!NEXTCHR_IS_EOS); if (utf8_target) { locinput += PL_utf8skip[nextchr]; - /* locinput is allowed to go 1 char off the end, but not 2+ */ + /* locinput is allowed to go 1 char off the end (signifying + * EOS), but not 2+ */ if (locinput > reginfo->strend) sayNO; } @@ -8481,16 +8522,17 @@ NULL DEBUG_STACK_r({ regmatch_state *cur = st; regmatch_state *curyes = yes_state; - int curd = depth; + U32 i; regmatch_slab *slab = PL_regmatch_slab; - for (;curd > -1 && (depth-curd < 3);cur--,curd--) { + for (i = 0; i < 3 && i <= depth; cur--,i++) { if (cur < SLAB_FIRST(slab)) { slab = slab->prev; cur = SLAB_LAST(slab); } - Perl_re_exec_indentf( aTHX_ "#%-3d %-10s %s\n", + Perl_re_exec_indentf( aTHX_ "%4s #%-3d %-10s %s\n", depth, - curd, PL_reg_name[cur->resume_state], + i ? " " : "push", + depth - i, PL_reg_name[cur->resume_state], (curyes == cur) ? "yes" : "" ); if (curyes == cur) @@ -8642,9 +8684,12 @@ NULL if (last_pushed_cv) { dSP; + /* see "Some notes about MULTICALL" above */ POP_MULTICALL; PERL_UNUSED_VAR(SP); } + else + LEAVE_SCOPE(orig_savestack_ix); assert(!result || locinput - reginfo->strbeg >= 0); return result ? locinput - reginfo->strbeg : -1; diff --git a/regexp.h b/regexp.h index 08b4fc3..9a2b61a 100644 --- a/regexp.h +++ b/regexp.h @@ -85,6 +85,14 @@ struct reg_code_block { REGEXP *src_regex; }; +/* array of reg_code_block's plus header info */ + +struct reg_code_blocks { + int refcnt; /* we may be pointed to from a regex and from the savestack */ + int count; /* how many code blocks */ + struct reg_code_block *cb; /* array of reg_code_block's */ +}; + /* The regexp/REGEXP struct, see L for further documentation @@ -262,7 +270,7 @@ and check for NULL. */ #define SvRX(sv) (Perl_get_re_arg(aTHX_ sv)) -#define SvRXOK(sv) (Perl_get_re_arg(aTHX_ sv) ? TRUE : FALSE) +#define SvRXOK(sv) cBOOL(Perl_get_re_arg(aTHX_ sv)) /* Flags stored in regexp->extflags diff --git a/regnodes.h b/regnodes.h index f820c56..8fe0f41 100644 --- a/regnodes.h +++ b/regnodes.h @@ -7,7 +7,7 @@ /* Regops and State definitions */ #define REGNODE_MAX 92 -#define REGMATCH_STATE_MAX 132 +#define REGMATCH_STATE_MAX 134 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -107,44 +107,46 @@ /* ------------ States ------------- */ #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ -#define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */ -#define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */ -#define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */ -#define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */ -#define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */ -#define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */ -#define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */ -#define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */ -#define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */ -#define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */ -#define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */ -#define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */ -#define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */ -#define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */ -#define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */ -#define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */ -#define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */ -#define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */ -#define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */ -#define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */ -#define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */ -#define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */ -#define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */ -#define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */ -#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */ -#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */ -#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */ -#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */ -#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */ -#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */ -#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */ -#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */ -#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */ -#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */ -#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */ -#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */ -#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */ -#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */ +#define EVAL_B (REGNODE_MAX + 3) /* state for EVAL */ +#define EVAL_B_fail (REGNODE_MAX + 4) /* state for EVAL */ +#define EVAL_postponed_AB (REGNODE_MAX + 5) /* state for EVAL */ +#define EVAL_postponed_AB_fail (REGNODE_MAX + 6) /* state for EVAL */ +#define CURLYX_end (REGNODE_MAX + 7) /* state for CURLYX */ +#define CURLYX_end_fail (REGNODE_MAX + 8) /* state for CURLYX */ +#define WHILEM_A_pre (REGNODE_MAX + 9) /* state for WHILEM */ +#define WHILEM_A_pre_fail (REGNODE_MAX + 10) /* state for WHILEM */ +#define WHILEM_A_min (REGNODE_MAX + 11) /* state for WHILEM */ +#define WHILEM_A_min_fail (REGNODE_MAX + 12) /* state for WHILEM */ +#define WHILEM_A_max (REGNODE_MAX + 13) /* state for WHILEM */ +#define WHILEM_A_max_fail (REGNODE_MAX + 14) /* state for WHILEM */ +#define WHILEM_B_min (REGNODE_MAX + 15) /* state for WHILEM */ +#define WHILEM_B_min_fail (REGNODE_MAX + 16) /* state for WHILEM */ +#define WHILEM_B_max (REGNODE_MAX + 17) /* state for WHILEM */ +#define WHILEM_B_max_fail (REGNODE_MAX + 18) /* state for WHILEM */ +#define BRANCH_next (REGNODE_MAX + 19) /* state for BRANCH */ +#define BRANCH_next_fail (REGNODE_MAX + 20) /* state for BRANCH */ +#define CURLYM_A (REGNODE_MAX + 21) /* state for CURLYM */ +#define CURLYM_A_fail (REGNODE_MAX + 22) /* state for CURLYM */ +#define CURLYM_B (REGNODE_MAX + 23) /* state for CURLYM */ +#define CURLYM_B_fail (REGNODE_MAX + 24) /* state for CURLYM */ +#define IFMATCH_A (REGNODE_MAX + 25) /* state for IFMATCH */ +#define IFMATCH_A_fail (REGNODE_MAX + 26) /* state for IFMATCH */ +#define CURLY_B_min_known (REGNODE_MAX + 27) /* state for CURLY */ +#define CURLY_B_min_known_fail (REGNODE_MAX + 28) /* state for CURLY */ +#define CURLY_B_min (REGNODE_MAX + 29) /* state for CURLY */ +#define CURLY_B_min_fail (REGNODE_MAX + 30) /* state for CURLY */ +#define CURLY_B_max (REGNODE_MAX + 31) /* state for CURLY */ +#define CURLY_B_max_fail (REGNODE_MAX + 32) /* state for CURLY */ +#define COMMIT_next (REGNODE_MAX + 33) /* state for COMMIT */ +#define COMMIT_next_fail (REGNODE_MAX + 34) /* state for COMMIT */ +#define MARKPOINT_next (REGNODE_MAX + 35) /* state for MARKPOINT */ +#define MARKPOINT_next_fail (REGNODE_MAX + 36) /* state for MARKPOINT */ +#define SKIP_next (REGNODE_MAX + 37) /* state for SKIP */ +#define SKIP_next_fail (REGNODE_MAX + 38) /* state for SKIP */ +#define CUTGROUP_next (REGNODE_MAX + 39) /* state for CUTGROUP */ +#define CUTGROUP_next_fail (REGNODE_MAX + 40) /* state for CUTGROUP */ +#define KEEPS_next (REGNODE_MAX + 41) /* state for KEEPS */ +#define KEEPS_next_fail (REGNODE_MAX + 42) /* state for KEEPS */ /* PL_regkind[] What type of regop or state is this. */ @@ -248,8 +250,10 @@ EXTCONST U8 PL_regkind[] = { /* ------------ States ------------- */ TRIE, /* TRIE_next */ TRIE, /* TRIE_next_fail */ - EVAL, /* EVAL_AB */ - EVAL, /* EVAL_AB_fail */ + EVAL, /* EVAL_B */ + EVAL, /* EVAL_B_fail */ + EVAL, /* EVAL_postponed_AB */ + EVAL, /* EVAL_postponed_AB_fail */ CURLYX, /* CURLYX_end */ CURLYX, /* CURLYX_end_fail */ WHILEM, /* WHILEM_A_pre */ @@ -592,44 +596,46 @@ EXTCONST char * const PL_reg_name[] = { /* ------------ States ------------- */ "TRIE_next", /* REGNODE_MAX +0x01 */ "TRIE_next_fail", /* REGNODE_MAX +0x02 */ - "EVAL_AB", /* REGNODE_MAX +0x03 */ - "EVAL_AB_fail", /* REGNODE_MAX +0x04 */ - "CURLYX_end", /* REGNODE_MAX +0x05 */ - "CURLYX_end_fail", /* REGNODE_MAX +0x06 */ - "WHILEM_A_pre", /* REGNODE_MAX +0x07 */ - "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */ - "WHILEM_A_min", /* REGNODE_MAX +0x09 */ - "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */ - "WHILEM_A_max", /* REGNODE_MAX +0x0b */ - "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */ - "WHILEM_B_min", /* REGNODE_MAX +0x0d */ - "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */ - "WHILEM_B_max", /* REGNODE_MAX +0x0f */ - "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */ - "BRANCH_next", /* REGNODE_MAX +0x11 */ - "BRANCH_next_fail", /* REGNODE_MAX +0x12 */ - "CURLYM_A", /* REGNODE_MAX +0x13 */ - "CURLYM_A_fail", /* REGNODE_MAX +0x14 */ - "CURLYM_B", /* REGNODE_MAX +0x15 */ - "CURLYM_B_fail", /* REGNODE_MAX +0x16 */ - "IFMATCH_A", /* REGNODE_MAX +0x17 */ - "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */ - "CURLY_B_min_known", /* REGNODE_MAX +0x19 */ - "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */ - "CURLY_B_min", /* REGNODE_MAX +0x1b */ - "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */ - "CURLY_B_max", /* REGNODE_MAX +0x1d */ - "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */ - "COMMIT_next", /* REGNODE_MAX +0x1f */ - "COMMIT_next_fail", /* REGNODE_MAX +0x20 */ - "MARKPOINT_next", /* REGNODE_MAX +0x21 */ - "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */ - "SKIP_next", /* REGNODE_MAX +0x23 */ - "SKIP_next_fail", /* REGNODE_MAX +0x24 */ - "CUTGROUP_next", /* REGNODE_MAX +0x25 */ - "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */ - "KEEPS_next", /* REGNODE_MAX +0x27 */ - "KEEPS_next_fail", /* REGNODE_MAX +0x28 */ + "EVAL_B", /* REGNODE_MAX +0x03 */ + "EVAL_B_fail", /* REGNODE_MAX +0x04 */ + "EVAL_postponed_AB", /* REGNODE_MAX +0x05 */ + "EVAL_postponed_AB_fail", /* REGNODE_MAX +0x06 */ + "CURLYX_end", /* REGNODE_MAX +0x07 */ + "CURLYX_end_fail", /* REGNODE_MAX +0x08 */ + "WHILEM_A_pre", /* REGNODE_MAX +0x09 */ + "WHILEM_A_pre_fail", /* REGNODE_MAX +0x0a */ + "WHILEM_A_min", /* REGNODE_MAX +0x0b */ + "WHILEM_A_min_fail", /* REGNODE_MAX +0x0c */ + "WHILEM_A_max", /* REGNODE_MAX +0x0d */ + "WHILEM_A_max_fail", /* REGNODE_MAX +0x0e */ + "WHILEM_B_min", /* REGNODE_MAX +0x0f */ + "WHILEM_B_min_fail", /* REGNODE_MAX +0x10 */ + "WHILEM_B_max", /* REGNODE_MAX +0x11 */ + "WHILEM_B_max_fail", /* REGNODE_MAX +0x12 */ + "BRANCH_next", /* REGNODE_MAX +0x13 */ + "BRANCH_next_fail", /* REGNODE_MAX +0x14 */ + "CURLYM_A", /* REGNODE_MAX +0x15 */ + "CURLYM_A_fail", /* REGNODE_MAX +0x16 */ + "CURLYM_B", /* REGNODE_MAX +0x17 */ + "CURLYM_B_fail", /* REGNODE_MAX +0x18 */ + "IFMATCH_A", /* REGNODE_MAX +0x19 */ + "IFMATCH_A_fail", /* REGNODE_MAX +0x1a */ + "CURLY_B_min_known", /* REGNODE_MAX +0x1b */ + "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1c */ + "CURLY_B_min", /* REGNODE_MAX +0x1d */ + "CURLY_B_min_fail", /* REGNODE_MAX +0x1e */ + "CURLY_B_max", /* REGNODE_MAX +0x1f */ + "CURLY_B_max_fail", /* REGNODE_MAX +0x20 */ + "COMMIT_next", /* REGNODE_MAX +0x21 */ + "COMMIT_next_fail", /* REGNODE_MAX +0x22 */ + "MARKPOINT_next", /* REGNODE_MAX +0x23 */ + "MARKPOINT_next_fail", /* REGNODE_MAX +0x24 */ + "SKIP_next", /* REGNODE_MAX +0x25 */ + "SKIP_next_fail", /* REGNODE_MAX +0x26 */ + "CUTGROUP_next", /* REGNODE_MAX +0x27 */ + "CUTGROUP_next_fail", /* REGNODE_MAX +0x28 */ + "KEEPS_next", /* REGNODE_MAX +0x29 */ + "KEEPS_next_fail", /* REGNODE_MAX +0x2a */ }; #endif /* DOINIT */ diff --git a/sv.c b/sv.c index 42e3441..e90ea84 100644 --- a/sv.c +++ b/sv.c @@ -4985,6 +4985,8 @@ Perl_sv_setpvn(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) PERL_ARGS_ASSERT_SV_SETPVN; SV_CHECK_THINKFIRST_COW_DROP(sv); + if (isGV_with_GP(sv)) + Perl_croak_no_modify(); if (!ptr) { (void)SvOK_off(sv); return; @@ -6326,7 +6328,7 @@ C that applies to C. */ void -Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *const little, const STRLEN littlelen, const U32 flags) +Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags) { char *big; char *mid; @@ -6339,6 +6341,16 @@ Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN l SvPV_force_flags(bigstr, curlen, flags); (void)SvPOK_only_UTF8(bigstr); + + if (little >= SvPVX(bigstr) && + little < SvPVX(bigstr) + (SvLEN(bigstr) ? SvLEN(bigstr) : SvCUR(bigstr))) { + /* little is a pointer to within bigstr, since we can reallocate bigstr, + or little...little+littlelen might overlap offset...offset+len we make a copy + */ + little = savepvn(little, littlelen); + SAVEFREEPV(little); + } + if (offset + len > curlen) { SvGROW(bigstr, offset+len+1); Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char); @@ -9323,7 +9335,14 @@ SV is set to 1. If C is zero, Perl will compute the length using C, (which means if you use this option, that C can't have embedded C characters and has to have a terminating C byte). -For efficiency, consider using C instead. +This function can cause reliability issues if you are likely to pass in +empty strings that are not null terminated, because it will run +strlen on the string and potentially run past valid memory. + +Using L is a safer alternative for non C terminated strings. +For string literals use L instead. This function will work fine for +C terminated strings, but if you want to avoid the if statement on whether +to call C use C instead (calling C yourself). =cut */ @@ -12872,8 +12891,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p Perl_croak_nocontext("panic: quadmath invalid format \"%s\"", ptr); elen = quadmath_snprintf(PL_efloatbuf, PL_efloatsize, qfmt, nv); - if ((IV)elen == -1) + if ((IV)elen == -1) { + if (qfmt != ptr) + SAVEFREEPV(qfmt); Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", qfmt); + } if (qfmt != ptr) Safefree(qfmt); } @@ -13192,6 +13214,7 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) parser->sig_elems = proto->sig_elems; parser->sig_optelems= proto->sig_optelems; parser->sig_slurpy = proto->sig_slurpy; + parser->recheck_utf8_validity = proto->recheck_utf8_validity; parser->linestr = sv_dup_inc(proto->linestr, param); { diff --git a/sv.h b/sv.h index 6227d46..82130b7 100644 --- a/sv.h +++ b/sv.h @@ -155,7 +155,10 @@ typedef enum { /* *** any alterations to the SV types above need to be reflected in * SVt_MASK and the various PL_valid_types_* tables. As of this writing those * tables are in perl.h. There are also two affected names tables in dump.c, - * one in B.xs, and 'bodies_by_type[]' in sv.c */ + * one in B.xs, and 'bodies_by_type[]' in sv.c. + * + * The bits that match 0xf0 are CURRENTLY UNUSED, except that 0xFF means a + * freed SV. The bits above that are for flags, like SVf_IOK */ #define SVt_MASK 0xf /* smallest bitmask that covers all types */ diff --git a/t/charset_tools.pl b/t/charset_tools.pl index 0621a7a..6e88a37 100644 --- a/t/charset_tools.pl +++ b/t/charset_tools.pl @@ -139,4 +139,38 @@ sub byte_utf8a_to_utf8n { return $out; } +my @i8_to_native = ( # Only code page 1047 so far. +# _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, +0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, +0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, +0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, +0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, +0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE, +); + +my @native_to_i8; +for (my $i = 0; $i < 256; $i++) { + $native_to_i8[$i8_to_native[$i]] = $i; +} + +*I8_to_native = ($::IS_ASCII) + ? sub { return shift } + : sub { return join "", map { chr $i8_to_native[ord $_] } + split "", shift }; +*native_to_I8 = ($::IS_ASCII) + ? sub { return shift } + : sub { return join "", map { chr $native_to_i8[ord $_] } + split "", shift }; + 1 diff --git a/t/comp/fold.t b/t/comp/fold.t index 4fa0734..a72394e 100644 --- a/t/comp/fold.t +++ b/t/comp/fold.t @@ -4,7 +4,7 @@ # we've not yet verified that use works. # use strict; -print "1..30\n"; +print "1..35\n"; my $test = 0; # Historically constant folding was performed by evaluating the ops, and if @@ -180,3 +180,15 @@ is "@values", "4 4", is $w, 1, '1+undef_constant is not folded outside warninsg scope'; BEGIN { $^W = 1 } } + +$a = eval 'my @z; @z = 0..~0 if 0; 3'; +is ($a, 3, "list constant folding doesn't signal compile-time error"); +is ($@, '', 'no error'); + +$b = 0; +$a = eval 'my @z; @z = 0..~0 if $b; 3'; +is ($a, 3, "list constant folding doesn't signal compile-time error"); +is ($@, '', 'no error'); + +$a = eval 'local $SIG{__WARN__} = sub {}; join("", ":".."~", "z")'; +is ($a, ":z", "aborted list constant folding still executable"); diff --git a/t/comp/parser.t b/t/comp/parser.t index 8be973b..6fd5ad0 100644 --- a/t/comp/parser.t +++ b/t/comp/parser.t @@ -8,7 +8,7 @@ BEGIN { chdir 't' if -d 't'; } -print "1..187\n"; +print "1..188\n"; sub failed { my ($got, $expected, $name) = @_; @@ -598,6 +598,12 @@ is $@, "", 'substr keys assignment'; like $@, qr/Missing right curly or square bracket/, 'RT #130311'; } +# RT #130815: crash in ck_return for malformed code +{ + eval 'm(@{if(0){sub d{]]])}return'; + like $@, qr/^syntax error at \(eval \d+\) line 1, near "\{\]"/, + 'RT #130815: null pointer deref'; +} # Add new tests HERE (above this line) diff --git a/t/lib/common.pl b/t/lib/common.pl index 561e1ff..9c7060f 100644 --- a/t/lib/common.pl +++ b/t/lib/common.pl @@ -31,6 +31,10 @@ if (@ARGV) { glob catfile(curdir(), "lib", $pragma_name, "*"); } +if ($::IS_EBCDIC) { # Skip Latin1 files + @w_files = grep { $_ !~ / _l1 $/x } @w_files +} + my ($tests, @prgs) = setup_multiple_progs(@w_files); $^X = rel2abs($^X); diff --git a/t/lib/croak/toke b/t/lib/croak/toke index f1817b3..4035495 100644 --- a/t/lib/croak/toke +++ b/t/lib/croak/toke @@ -180,9 +180,16 @@ Execution of - aborted due to compilation errors. # NAME Regexp constant overloading when *^H is undefined use overload; BEGIN { overload::constant qr => sub {}; undef *^H } -/a/, m'a' +/a/ EXPECT Constant(qq) unknown at - line 3, within pattern +Execution of - aborted due to compilation errors. +######## +# NAME Regexp constant overloading when *^H is undefined +use overload; +BEGIN { overload::constant qr => sub {}; undef *^H } +m'a' +EXPECT Constant(q) unknown at - line 3, within pattern Execution of - aborted due to compilation errors. ######## @@ -232,9 +239,16 @@ Execution of - aborted due to compilation errors. # NAME Regexp constant overloading returning undef use overload; BEGIN { overload::constant qr => sub {} } -/a/, m'a' +/a/ EXPECT Constant(qq): Call to &{$^H{qr}} did not return a defined value at - line 3, within pattern +Execution of - aborted due to compilation errors. +######## +# NAME Regexp constant overloading returning undef +use overload; +BEGIN { overload::constant qr => sub {} } +m'a' +EXPECT Constant(q): Call to &{$^H{qr}} did not return a defined value at - line 3, within pattern Execution of - aborted due to compilation errors. ######## diff --git a/t/lib/croak/toke_l1 b/t/lib/croak/toke_l1 new file mode 100644 index 0000000..6d656be --- /dev/null +++ b/t/lib/croak/toke_l1 @@ -0,0 +1,22 @@ +# File is encoded in latin-1 so can have malformed-utf8 +__END__ +# NAME [perl #129037] +BEGIN{{};$^H=-1}0à +EXPECT +Malformed UTF-8 character: \xc3\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc3; need 2 bytes, got 1) at - line 1. +Malformed UTF-8 character (fatal) at - line 1. +######## +# NAME [perl #129157] +BEGIN {$^H {q} = sub {pop and-t write gmtime getpwuid @p }; $^H =-6**4,0*215} +"@ust weÃÃÃÃÃÃÃÃÃÃÃtprotobyname"; "9 "Y=n {pop and-p[p };shmr [A + G----C +EXPECT +Malformed UTF-8 character: \xc3\xc3 (unexpected non-continuation byte 0xc3, immediately after start byte 0xc3; need 2 bytes, got 1) at - line 2. +Malformed UTF-8 character (fatal) at - line 2. +######## +# NAME [perl #130675] +use utf8;y'0Á'' +EXPECT +Malformed UTF-8 character: \xc1\x27 (unexpected non-continuation byte 0x27, immediately after start byte 0xc1; need 2 bytes, got 1) at - line 1. +Malformed UTF-8 character: \xc1\x27 (any UTF-8 sequence that starts with "\xc1" is overlong which can and should be represented with a different, shorter sequence) at - line 1. +Malformed UTF-8 character (fatal) at - line 1. diff --git a/t/lib/warnings/op b/t/lib/warnings/op index 46885e2..b128eec 100644 --- a/t/lib/warnings/op +++ b/t/lib/warnings/op @@ -2117,3 +2117,9 @@ do xa for qq(sin); do xa for my $a; do xa for my @a; EXPECT +######## +# TODO [perl #125493 +use warnings; +$_="3.14159"; +tr/0-9/\x{6F0}-\x{6F9}/; +EXPECT diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp index 44ef5c3..2b084c5 100644 --- a/t/lib/warnings/regcomp +++ b/t/lib/warnings/regcomp @@ -31,12 +31,12 @@ BEGIN { use warnings; $a = qr/\c,/; $a = qr/[\c,]/; -no warnings 'syntax', 'deprecated'; +no warnings 'syntax'; $a = qr/\c,/; $a = qr/[\c,]/; EXPECT -"\c," is more clearly written simply as "l". This will be a fatal error in Perl 5.28 at - line 9. -"\c," is more clearly written simply as "l". This will be a fatal error in Perl 5.28 at - line 10. +"\c," is more clearly written simply as "l" at - line 9. +"\c," is more clearly written simply as "l" at - line 10. ######## # This is because currently a different error is output under # use re 'strict', so can't go in reg_mesg.t diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index fe8adc5..fc51d9f 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -1385,12 +1385,12 @@ BEGIN { use warnings; $a = "\c,"; $a = "\c`"; -no warnings 'syntax', 'deprecated'; +no warnings 'syntax'; $a = "\c,"; $a = "\c`"; EXPECT -"\c," is more clearly written simply as "l". This will be a fatal error in Perl 5.28 at - line 9. -"\c`" is more clearly written simply as "\ ". This will be a fatal error in Perl 5.28 at - line 10. +"\c," is more clearly written simply as "l" at - line 9. +"\c`" is more clearly written simply as "\ " at - line 10. ######## # toke.c BEGIN { @@ -1619,4 +1619,35 @@ BEGIN{ use utf8; my $a = qr ̂foobar̂; EXPECT -Use of unassigned code point or non-standalone grapheme for a delimiter will be a fatal error starting in Perl v5.30 at - line 8. +Use of unassigned code point or non-standalone grapheme for a delimiter will be a fatal error starting in Perl 5.30 at - line 8. +######## +# NAME [perl #130567] Assertion failure +BEGIN { + if (ord('A') != 65) { + print "SKIPPED\n# test is ASCII-specific"; + exit 0; + } +} +no warnings "uninitialized"; +$_= ""; +s//\3000/; +s//"\x{180};;s\221(*$@$`\241\275";/gee; +s//"s\221\302\302\302\302\302\302\302$@\241\275";/gee; +EXPECT +######## +# NAME [perl #130666] Assertion failure +no warnings "uninitialized"; +BEGIN{$^H=-1};my $l; s$0[$l] +EXPECT +######## +# NAME [perl #129036] Assertion failure +BEGIN{$0="";$^H=hex join""=>A00000}p? +EXPECT +OPTION fatal +syntax error at - line 1, at EOF +Execution of - aborted due to compilation errors. +######## +# NAME [perl #130655] +use utf8; +qw∘foo ∞ ♥ bar∘ +EXPECT diff --git a/t/op/aassign.t b/t/op/aassign.t index b8025cf..4e7aee7 100644 --- a/t/op/aassign.t +++ b/t/op/aassign.t @@ -272,7 +272,7 @@ sub sh { SKIP: { use Config; # debugging builds will detect this failure and panic - skip "DEBUGGING build" if $::Config{ccflags} =~ /DEBUGGING/ + skip "DEBUGGING build" if $::Config{ccflags} =~ /(? 99; +plan tests => 100; our $TODO; my $deprecated = 0; @@ -801,3 +801,12 @@ TODO: { } EOC } + +sub revnumcmp ($$) { + goto FOO; + die; + FOO: + return $_[1] <=> $_[0]; +} +is eval { join(":", sort revnumcmp (9,5,1,3,7)) }, "9:7:5:3:1", + "can goto at top level of multicalled sub"; diff --git a/t/op/lex.t b/t/op/lex.t index e50f0eb..7a05ee9 100644 --- a/t/op/lex.t +++ b/t/op/lex.t @@ -7,7 +7,7 @@ use warnings; BEGIN { chdir 't' if -d 't'; require './test.pl'; } -plan(tests => 34); +plan(tests => 36); { no warnings 'deprecated'; @@ -273,3 +273,18 @@ SKIP: '[perl #129000] read before buffer' ); } +# probably only failed under ASAN +fresh_perl_is( + "stat\tt\$#0", + <<'EOM', +$# is no longer supported. Its use will be fatal in Perl 5.30 at - line 1. +Number found where operator expected at - line 1, near "$#0" + (Missing operator before 0?) +Can't call method "t" on an undefined value at - line 1. +EOM + {}, + "[perl #129273] heap use after free or overflow" +); + +fresh_perl_like('flock _$', qr/Not enough arguments for flock/, {stderr => 1}, + "[perl #129190] intuit_method() invalidates PL_bufptr"); diff --git a/t/op/multideref.t b/t/op/multideref.t index daf147d..199e523 100644 --- a/t/op/multideref.t +++ b/t/op/multideref.t @@ -18,7 +18,7 @@ BEGIN { use warnings; use strict; -plan 58; +plan 62; # check that strict refs hint is handled @@ -205,3 +205,21 @@ sub defer {} or diag("eval gave: $@"); is($warn, "", "#123609: warn"); } + +# RT #130727 +# a [ah]elem op can be both OPpLVAL_INTRO and OPpDEREF. It may not make +# much sense, but it shouldn't fail an assert. + +{ + my @x; + eval { @{local $x[0][0]} = 1; }; + like $@, qr/Can't use an undefined value as an ARRAY reference/, + "RT #130727 error"; + ok !defined $x[0][0],"RT #130727 array not autovivified"; + + eval { @{1, local $x[0][0]} = 1; }; + like $@, qr/Can't use an undefined value as an ARRAY reference/, + "RT #130727 part 2: error"; + ok !defined $x[0][0],"RT #130727 part 2: array not autovivified"; + +} diff --git a/t/op/ord.t b/t/op/ord.t index deb0880..5776755 100644 --- a/t/op/ord.t +++ b/t/op/ord.t @@ -6,7 +6,7 @@ BEGIN { set_up_inc(qw(. ../lib)); # ../lib needed for test.deparse } -plan tests => 35; +plan tests => 38; # compile time evaluation @@ -66,3 +66,9 @@ is(ord($x), 0x1234, 'runtime ord \x{....}'); is(ord(chr(0x1FFFFF)), 0x1FFFFF, 'last four-byte char in UTF-8'); is(ord(chr(0x200000)), 0x200000, 'first five-byte char in UTF-8'); } + +is(ord(""), 0, "ord of literal empty string"); +is(ord(do { my $x = ""; utf8::downgrade($x); $x }), 0, + "ord of downgraded empty string"); +is(ord(do { my $x = ""; utf8::upgrade($x); $x }), 0, + "ord of upgraded empty string"); diff --git a/t/op/signatures.t b/t/op/signatures.t index 0e53bf0..f0e1b93 100644 --- a/t/op/signatures.t +++ b/t/op/signatures.t @@ -34,125 +34,125 @@ sub t002 () { $a || "z" } is prototype(\&t002), undef; is eval("t002()"), 123; is eval("t002(456)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t002' at \(eval \d+\) line 1\.\n\z/; is eval("t002(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t002' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t003 ( ) { $a || "z" } is prototype(\&t003), undef; is eval("t003()"), 123; is eval("t003(456)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t003' at \(eval \d+\) line 1\.\n\z/; is eval("t003(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t003' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t006 ($a) { $a || "z" } is prototype(\&t006), undef; is eval("t006()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t006' at \(eval \d+\) line 1\.\n\z/; is eval("t006(0)"), "z"; is eval("t006(456)"), 456; is eval("t006(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t006' at \(eval \d+\) line 1\.\n\z/; is eval("t006(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t006' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t007 ($a, $b) { $a.$b } is prototype(\&t007), undef; is eval("t007()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t007' at \(eval \d+\) line 1\.\n\z/; is eval("t007(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t007' at \(eval \d+\) line 1\.\n\z/; is eval("t007(456, 789)"), "456789"; is eval("t007(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t007' at \(eval \d+\) line 1\.\n\z/; is eval("t007(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t007' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t008 ($a, $b, $c) { $a.$b.$c } is prototype(\&t008), undef; is eval("t008()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t008' at \(eval \d+\) line 1\.\n\z/; is eval("t008(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t008' at \(eval \d+\) line 1\.\n\z/; is eval("t008(456, 789)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t008' at \(eval \d+\) line 1\.\n\z/; is eval("t008(456, 789, 987)"), "456789987"; is eval("t008(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t008' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t009 ($abc, $def) { $abc.$def } is prototype(\&t009), undef; is eval("t009()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t009' at \(eval \d+\) line 1\.\n\z/; is eval("t009(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t009' at \(eval \d+\) line 1\.\n\z/; is eval("t009(456, 789)"), "456789"; is eval("t009(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t009' at \(eval \d+\) line 1\.\n\z/; is eval("t009(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t009' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t010 ($a, $) { $a || "z" } is prototype(\&t010), undef; is eval("t010()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t010' at \(eval \d+\) line 1\.\n\z/; is eval("t010(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t010' at \(eval \d+\) line 1\.\n\z/; is eval("t010(0, 789)"), "z"; is eval("t010(456, 789)"), 456; is eval("t010(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t010' at \(eval \d+\) line 1\.\n\z/; is eval("t010(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t010' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t011 ($, $a) { $a || "z" } is prototype(\&t011), undef; is eval("t011()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t011' at \(eval \d+\) line 1\.\n\z/; is eval("t011(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t011' at \(eval \d+\) line 1\.\n\z/; is eval("t011(456, 0)"), "z"; is eval("t011(456, 789)"), 789; is eval("t011(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t011' at \(eval \d+\) line 1\.\n\z/; is eval("t011(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t011' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t012 ($, $) { $a || "z" } is prototype(\&t012), undef; is eval("t012()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t012' at \(eval \d+\) line 1\.\n\z/; is eval("t012(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t012' at \(eval \d+\) line 1\.\n\z/; is eval("t012(0, 789)"), 123; is eval("t012(456, 789)"), 123; is eval("t012(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t012' at \(eval \d+\) line 1\.\n\z/; is eval("t012(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t012' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t013 ($) { $a || "z" } is prototype(\&t013), undef; is eval("t013()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t013' at \(eval \d+\) line 1\.\n\z/; is eval("t013(0)"), 123; is eval("t013(456)"), 123; is eval("t013(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t013' at \(eval \d+\) line 1\.\n\z/; is eval("t013(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t013' at \(eval \d+\) line 1\.\n\z/; is eval("t013(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t013' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t014 ($a = 222) { $a // "z" } @@ -162,9 +162,9 @@ is eval("t014(0)"), 0; is eval("t014(undef)"), "z"; is eval("t014(456)"), 456; is eval("t014(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t014' at \(eval \d+\) line 1\.\n\z/; is eval("t014(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t014' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t015 ($a = undef) { $a // "z" } @@ -174,9 +174,9 @@ is eval("t015(0)"), 0; is eval("t015(undef)"), "z"; is eval("t015(456)"), 456; is eval("t015(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t015' at \(eval \d+\) line 1\.\n\z/; is eval("t015(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t015' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t016 ($a = do { $z++; 222 }) { $a // "z" } @@ -188,9 +188,9 @@ is eval("t016(0)"), 0; is eval("t016(undef)"), "z"; is eval("t016(456)"), 456; is eval("t016(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t016' at \(eval \d+\) line 1\.\n\z/; is eval("t016(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t016' at \(eval \d+\) line 1\.\n\z/; is $z, 1; is eval("t016()"), 222; is $z, 2; @@ -206,9 +206,9 @@ is eval("t017(0)"), 0; is eval("t017(undef)"), "z"; is eval("t017(456)"), 456; is eval("t017(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t017' at \(eval \d+\) line 1\.\n\z/; is eval("t017(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t017' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t019 ($p = 222, $a = 333) { "$p/$a" } @@ -218,7 +218,7 @@ is eval("t019(0)"), "0/333"; is eval("t019(456)"), "456/333"; is eval("t019(456, 789)"), "456/789"; is eval("t019(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t019' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t020 :prototype($) { $_[0]."z" } @@ -229,7 +229,7 @@ is eval("t021(0)"), "0/333"; is eval("t021(456)"), "456/333"; is eval("t021(456, 789)"), "456/789"; is eval("t021(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t021' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t022 ($p = do { $z += 10; 222 }, $a = do { $z++; 333 }) { "$p/$a" } @@ -243,7 +243,7 @@ is eval("t022(456)"), "456/333"; is $z, 13; is eval("t022(456, 789)"), "456/789"; is eval("t022(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t022' at \(eval \d+\) line 1\.\n\z/; is $z, 13; is $a, 123; @@ -252,7 +252,7 @@ is prototype(\&t023), undef; is eval("t023()"), "azy"; is eval("t023(sub { \"x\".\$_[0].\"x\" })"), "xaxy"; is eval("t023(sub { \"x\".\$_[0].\"x\" }, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t023' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t036 ($a = $a."x") { $a."y" } @@ -261,7 +261,7 @@ is eval("t036()"), "123xy"; is eval("t036(0)"), "0y"; is eval("t036(456)"), "456y"; is eval("t036(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t036' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t120 ($a = $_) { $a // "z" } @@ -276,7 +276,7 @@ $_ = "___"; is eval("t120(456)"), 456; $_ = "___"; is eval("t120(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t120' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t121 ($a = caller) { $a // "z" } @@ -286,13 +286,13 @@ is eval("t121(undef)"), "z"; is eval("t121(0)"), 0; is eval("t121(456)"), 456; is eval("t121(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t121' at \(eval \d+\) line 1\.\n\z/; is eval("package T121::Z; ::t121()"), "T121::Z"; is eval("package T121::Z; ::t121(undef)"), "z"; is eval("package T121::Z; ::t121(0)"), 0; is eval("package T121::Z; ::t121(456)"), 456; is eval("package T121::Z; ::t121(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t121' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t129 ($a = return 222) { $a."x" } @@ -301,7 +301,7 @@ is eval("t129()"), "222"; is eval("t129(0)"), "0x"; is eval("t129(456)"), "456x"; is eval("t129(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t129' at \(eval \d+\) line 1\.\n\z/; is $a, 123; use feature "current_sub"; @@ -313,7 +313,7 @@ is eval("t122(1)"), "10"; is eval("t122(5)"), "543210"; is eval("t122(5, 789)"), "5789"; is eval("t122(5, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t122' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t123 ($list = wantarray) { $list ? "list" : "scalar" } @@ -325,7 +325,7 @@ is eval("(t123(0))[0]"), "scalar"; is eval("scalar(t123(1))"), "list"; is eval("(t123(1))[0]"), "list"; is eval("t123(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t123' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t124 ($b = (local $a = $a + 1)) { "$a/$b" } @@ -335,7 +335,7 @@ is $a, 123; is eval("t124(456)"), "123/456"; is $a, 123; is eval("t124(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t124' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t125 ($c = (our $t125_counter)++) { $c } @@ -348,7 +348,7 @@ is eval("t125(789)"), 789; is eval("t125()"), 3; is eval("t125()"), 4; is eval("t125(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t125' at \(eval \d+\) line 1\.\n\z/; is $a, 123; use feature "state"; @@ -364,7 +364,7 @@ is $z, 223; is eval("t126()"), 222; is $z, 223; is eval("t126(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t126' at \(eval \d+\) line 1\.\n\z/; is $z, 223; is $a, 123; @@ -383,7 +383,7 @@ is eval("t127(789)"), 789; is eval("t127()"), 225; is eval("t127()"), 226; is eval("t127(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t127' at \(eval \d+\) line 1\.\n\z/; is $z, 223; is $a, 123; @@ -394,7 +394,7 @@ is eval("t037(0)"), "0/0x"; is eval("t037(456)"), "456/456x"; is eval("t037(456, 789)"), "456/789"; is eval("t037(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t037' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t128 ($a = 222, $b = ($a = 333)) { "$a/$b" } @@ -404,7 +404,7 @@ is eval("t128(0)"), "333/333"; is eval("t128(456)"), "333/333"; is eval("t128(456, 789)"), "456/789"; is eval("t128(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t128' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t130 { join(",", @_).";".scalar(@_) } @@ -415,7 +415,7 @@ is eval("t131(0)"), "0;1"; is eval("t131(456)"), "456;1"; is eval("t131(456, 789)"), "456/789"; is eval("t131(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t131' at \(eval \d+\) line 1\.\n\z/; is $a, 123; eval "#line 8 foo\nsub t024 (\$a =) { }"; @@ -428,11 +428,11 @@ is eval("t025()"), 123; is eval("t025(0)"), 123; is eval("t025(456)"), 123; is eval("t025(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t025' at \(eval \d+\) line 1\.\n\z/; is eval("t025(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t025' at \(eval \d+\) line 1\.\n\z/; is eval("t025(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t025' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t026 ($ = 222) { $a // "z" } @@ -441,11 +441,11 @@ is eval("t026()"), 123; is eval("t026(0)"), 123; is eval("t026(456)"), 123; is eval("t026(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t026' at \(eval \d+\) line 1\.\n\z/; is eval("t026(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t026' at \(eval \d+\) line 1\.\n\z/; is eval("t026(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t026' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t032 ($ = do { $z++; 222 }) { $a // "z" } @@ -456,11 +456,11 @@ is $z, 1; is eval("t032(0)"), 123; is eval("t032(456)"), 123; is eval("t032(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t032' at \(eval \d+\) line 1\.\n\z/; is eval("t032(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t032' at \(eval \d+\) line 1\.\n\z/; is eval("t032(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t032' at \(eval \d+\) line 1\.\n\z/; is $z, 1; is $a, 123; @@ -470,11 +470,11 @@ is eval("t027()"), 123; is eval("t027(0)"), 123; is eval("t027(456)"), 123; is eval("t027(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t027' at \(eval \d+\) line 1\.\n\z/; is eval("t027(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t027' at \(eval \d+\) line 1\.\n\z/; is eval("t027(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t027' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t119 ($ =, $a = 333) { $a // "z" } @@ -484,81 +484,81 @@ is eval("t119(0)"), 333; is eval("t119(456)"), 333; is eval("t119(456, 789)"), 789; is eval("t119(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t119' at \(eval \d+\) line 1\.\n\z/; is eval("t119(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t119' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t028 ($a, $b = 333) { "$a/$b" } is prototype(\&t028), undef; is eval("t028()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t028' at \(eval \d+\) line 1\.\n\z/; is eval("t028(0)"), "0/333"; is eval("t028(456)"), "456/333"; is eval("t028(456, 789)"), "456/789"; is eval("t028(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t028' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t045 ($a, $ = 333) { "$a/" } is prototype(\&t045), undef; is eval("t045()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t045' at \(eval \d+\) line 1\.\n\z/; is eval("t045(0)"), "0/"; is eval("t045(456)"), "456/"; is eval("t045(456, 789)"), "456/"; is eval("t045(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t045' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t046 ($, $b = 333) { "$a/$b" } is prototype(\&t046), undef; is eval("t046()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t046' at \(eval \d+\) line 1\.\n\z/; is eval("t046(0)"), "123/333"; is eval("t046(456)"), "123/333"; is eval("t046(456, 789)"), "123/789"; is eval("t046(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t046' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t047 ($, $ = 333) { "$a/" } is prototype(\&t047), undef; is eval("t047()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t047' at \(eval \d+\) line 1\.\n\z/; is eval("t047(0)"), "123/"; is eval("t047(456)"), "123/"; is eval("t047(456, 789)"), "123/"; is eval("t047(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t047' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t029 ($a, $b, $c = 222, $d = 333) { "$a/$b/$c/$d" } is prototype(\&t029), undef; is eval("t029()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t029' at \(eval \d+\) line 1\.\n\z/; is eval("t029(0)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t029' at \(eval \d+\) line 1\.\n\z/; is eval("t029(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t029' at \(eval \d+\) line 1\.\n\z/; is eval("t029(456, 789)"), "456/789/222/333"; is eval("t029(456, 789, 987)"), "456/789/987/333"; is eval("t029(456, 789, 987, 654)"), "456/789/987/654"; is eval("t029(456, 789, 987, 654, 321)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t029' at \(eval \d+\) line 1\.\n\z/; is eval("t029(456, 789, 987, 654, 321, 111)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t029' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t038 ($a, $b = $a."x") { "$a/$b" } is prototype(\&t038), undef; is eval("t038()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t038' at \(eval \d+\) line 1\.\n\z/; is eval("t038(0)"), "0/0x"; is eval("t038(456)"), "456/456x"; is eval("t038(456, 789)"), "456/789"; is eval("t038(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t038' at \(eval \d+\) line 1\.\n\z/; is $a, 123; eval "#line 8 foo\nsub t030 (\$a = 222, \$b) { }"; @@ -610,15 +610,15 @@ sub t039 (%abc) { join("/", map { $_."=".$abc{$_} } sort keys %abc) } is prototype(\&t039), undef; is eval("t039()"), ""; is eval("t039(0)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t039' at \(eval \d+\) line 1\.\n\z#; is eval("t039(456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t039' at \(eval \d+\) line 1\.\n\z#; is eval("t039(456, 789)"), "456=789"; is eval("t039(456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t039' at \(eval \d+\) line 1\.\n\z#; is eval("t039(456, 789, 987, 654)"), "456=789/987=654"; is eval("t039(456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t039' at \(eval \d+\) line 1\.\n\z#; is eval("t039(456, 789, 987, 654, 321, 111)"), "321=111/456=789/987=654"; is $a, 123; @@ -632,15 +632,15 @@ sub t040 (%) { $a } is prototype(\&t040), undef; is eval("t040()"), 123; is eval("t040(0)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t040' at \(eval \d+\) line 1\.\n\z#; is eval("t040(456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t040' at \(eval \d+\) line 1\.\n\z#; is eval("t040(456, 789)"), 123; is eval("t040(456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t040' at \(eval \d+\) line 1\.\n\z#; is eval("t040(456, 789, 987, 654)"), 123; is eval("t040(456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t040' at \(eval \d+\) line 1\.\n\z#; is eval("t040(456, 789, 987, 654, 321, 111)"), 123; is $a, 123; @@ -653,7 +653,7 @@ is $@, qq{A slurpy parameter may not have a default value at foo line 8, near "= sub t041 ($a, @b) { $a.";".join("/", @b) } is prototype(\&t041), undef; is eval("t041()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t041' at \(eval \d+\) line 1\.\n\z/; is eval("t041(0)"), "0;"; is eval("t041(456)"), "456;"; is eval("t041(456, 789)"), "456;789"; @@ -666,7 +666,7 @@ is $a, 123; sub t042 ($a, @) { $a.";" } is prototype(\&t042), undef; is eval("t042()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t042' at \(eval \d+\) line 1\.\n\z/; is eval("t042(0)"), "0;"; is eval("t042(456)"), "456;"; is eval("t042(456, 789)"), "456;"; @@ -679,7 +679,7 @@ is $a, 123; sub t043 ($, @b) { $a.";".join("/", @b) } is prototype(\&t043), undef; is eval("t043()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t043' at \(eval \d+\) line 1\.\n\z/; is eval("t043(0)"), "123;"; is eval("t043(456)"), "123;"; is eval("t043(456, 789)"), "123;789"; @@ -692,7 +692,7 @@ is $a, 123; sub t044 ($, @) { $a.";" } is prototype(\&t044), undef; is eval("t044()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t044' at \(eval \d+\) line 1\.\n\z/; is eval("t044(0)"), "123;"; is eval("t044(456)"), "123;"; is eval("t044(456, 789)"), "123;"; @@ -705,16 +705,16 @@ is $a, 123; sub t049 ($a, %b) { $a.";".join("/", map { $_."=".$b{$_} } sort keys %b) } is prototype(\&t049), undef; is eval("t049()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t049' at \(eval \d+\) line 1\.\n\z/; is eval("t049(222)"), "222;"; is eval("t049(222, 456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t049' at \(eval \d+\) line 1\.\n\z#; is eval("t049(222, 456, 789)"), "222;456=789"; is eval("t049(222, 456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t049' at \(eval \d+\) line 1\.\n\z#; is eval("t049(222, 456, 789, 987, 654)"), "222;456=789/987=654"; is eval("t049(222, 456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t049' at \(eval \d+\) line 1\.\n\z#; is eval("t049(222, 456, 789, 987, 654, 321, 111)"), "222;321=111/456=789/987=654"; is $a, 123; @@ -722,11 +722,11 @@ is $a, 123; sub t051 ($a, $b, $c, @d) { "$a;$b;$c;".join("/", @d).";".scalar(@d) } is prototype(\&t051), undef; is eval("t051()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t051' at \(eval \d+\) line 1\.\n\z/; is eval("t051(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t051' at \(eval \d+\) line 1\.\n\z/; is eval("t051(456, 789)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t051' at \(eval \d+\) line 1\.\n\z/; is eval("t051(456, 789, 987)"), "456;789;987;;0"; is eval("t051(456, 789, 987, 654)"), "456;789;987;654;1"; is eval("t051(456, 789, 987, 654, 321)"), "456;789;987;654/321;2"; @@ -736,18 +736,18 @@ is $a, 123; sub t052 ($a, $b, %c) { "$a;$b;".join("/", map { $_."=".$c{$_} } sort keys %c) } is prototype(\&t052), undef; is eval("t052()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t052' at \(eval \d+\) line 1\.\n\z/; is eval("t052(222)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t052' at \(eval \d+\) line 1\.\n\z/; is eval("t052(222, 333)"), "222;333;"; is eval("t052(222, 333, 456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t052' at \(eval \d+\) line 1\.\n\z#; is eval("t052(222, 333, 456, 789)"), "222;333;456=789"; is eval("t052(222, 333, 456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t052' at \(eval \d+\) line 1\.\n\z#; is eval("t052(222, 333, 456, 789, 987, 654)"), "222;333;456=789/987=654"; is eval("t052(222, 333, 456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t052' at \(eval \d+\) line 1\.\n\z#; is eval("t052(222, 333, 456, 789, 987, 654, 321, 111)"), "222;333;321=111/456=789/987=654"; is $a, 123; @@ -757,21 +757,21 @@ sub t053 ($a, $b, $c, %d) { } is prototype(\&t053), undef; is eval("t053()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t053' at \(eval \d+\) line 1\.\n\z/; is eval("t053(222)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t053' at \(eval \d+\) line 1\.\n\z/; is eval("t053(222, 333)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t053' at \(eval \d+\) line 1\.\n\z/; is eval("t053(222, 333, 444)"), "222;333;444;"; is eval("t053(222, 333, 444, 456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t053' at \(eval \d+\) line 1\.\n\z#; is eval("t053(222, 333, 444, 456, 789)"), "222;333;444;456=789"; is eval("t053(222, 333, 444, 456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t053' at \(eval \d+\) line 1\.\n\z#; is eval("t053(222, 333, 444, 456, 789, 987, 654)"), "222;333;444;456=789/987=654"; is eval("t053(222, 333, 444, 456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t053' at \(eval \d+\) line 1\.\n\z#; is eval("t053(222, 333, 444, 456, 789, 987, 654, 321, 111)"), "222;333;444;321=111/456=789/987=654"; is $a, 123; @@ -817,13 +817,13 @@ is prototype(\&t050), undef; is eval("t050()"), "211;"; is eval("t050(222)"), "222;"; is eval("t050(222, 456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t050' at \(eval \d+\) line 1\.\n\z#; is eval("t050(222, 456, 789)"), "222;456=789"; is eval("t050(222, 456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t050' at \(eval \d+\) line 1\.\n\z#; is eval("t050(222, 456, 789, 987, 654)"), "222;456=789/987=654"; is eval("t050(222, 456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t050' at \(eval \d+\) line 1\.\n\z#; is eval("t050(222, 456, 789, 987, 654, 321, 111)"), "222;321=111/456=789/987=654"; is $a, 123; @@ -836,13 +836,13 @@ is eval("t056()"), "211;311;"; is eval("t056(222)"), "222;311;"; is eval("t056(222, 333)"), "222;333;"; is eval("t056(222, 333, 456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t056' at \(eval \d+\) line 1\.\n\z#; is eval("t056(222, 333, 456, 789)"), "222;333;456=789"; is eval("t056(222, 333, 456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t056' at \(eval \d+\) line 1\.\n\z#; is eval("t056(222, 333, 456, 789, 987, 654)"), "222;333;456=789/987=654"; is eval("t056(222, 333, 456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t056' at \(eval \d+\) line 1\.\n\z#; is eval("t056(222, 333, 456, 789, 987, 654, 321, 111)"), "222;333;321=111/456=789/987=654"; is $a, 123; @@ -856,14 +856,14 @@ is eval("t057(222)"), "222;311;411;"; is eval("t057(222, 333)"), "222;333;411;"; is eval("t057(222, 333, 444)"), "222;333;444;"; is eval("t057(222, 333, 444, 456)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t057' at \(eval \d+\) line 1\.\n\z#; is eval("t057(222, 333, 444, 456, 789)"), "222;333;444;456=789"; is eval("t057(222, 333, 444, 456, 789, 987)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t057' at \(eval \d+\) line 1\.\n\z#; is eval("t057(222, 333, 444, 456, 789, 987, 654)"), "222;333;444;456=789/987=654"; is eval("t057(222, 333, 444, 456, 789, 987, 654, 321)"), undef; -like $@, qr#\AOdd name/value argument for subroutine at \(eval \d+\) line 1\.\n\z#; +like $@, qr#\AOdd name/value argument for subroutine 'main::t057' at \(eval \d+\) line 1\.\n\z#; is eval("t057(222, 333, 444, 456, 789, 987, 654, 321, 111)"), "222;333;444;321=111/456=789/987=654"; is $a, 123; @@ -871,7 +871,7 @@ is $a, 123; sub t058 ($a, $b = 333, @c) { "$a;$b;".join("/", @c).";".scalar(@c) } is prototype(\&t058), undef; is eval("t058()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t058' at \(eval \d+\) line 1\.\n\z/; is eval("t058(456)"), "456;333;;0"; is eval("t058(456, 789)"), "456;789;;0"; is eval("t058(456, 789, 987)"), "456;789;987;1"; @@ -949,27 +949,27 @@ EOF sub t080 ($a,,, $b) { $a.$b } is prototype(\&t080), undef; is eval("t080()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t080' at \(eval \d+\) line 1\.\n\z/; is eval("t080(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t080' at \(eval \d+\) line 1\.\n\z/; is eval("t080(456, 789)"), "456789"; is eval("t080(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t080' at \(eval \d+\) line 1\.\n\z/; is eval("t080(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t080' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t081 ($a, $b,,) { $a.$b } is prototype(\&t081), undef; is eval("t081()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t081' at \(eval \d+\) line 1\.\n\z/; is eval("t081(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t081' at \(eval \d+\) line 1\.\n\z/; is eval("t081(456, 789)"), "456789"; is eval("t081(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t081' at \(eval \d+\) line 1\.\n\z/; is eval("t081(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t081' at \(eval \d+\) line 1\.\n\z/; is $a, 123; eval "#line 8 foo\nsub t082 (, \$a) { }"; @@ -981,14 +981,14 @@ is $@, qq{syntax error at foo line 8, near "(,"\n}; sub t084($a,$b){ $a.$b } is prototype(\&t084), undef; is eval("t084()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t084' at \(eval \d+\) line 1\.\n\z/; is eval("t084(456)"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t084' at \(eval \d+\) line 1\.\n\z/; is eval("t084(456, 789)"), "456789"; is eval("t084(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t084' at \(eval \d+\) line 1\.\n\z/; is eval("t084(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t084' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t085 @@ -1007,13 +1007,13 @@ sub t085 { $a.$b } is prototype(\&t085), undef; is eval("t085()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t085' at \(eval \d+\) line 1\.\n\z/; is eval("t085(456)"), "456333"; is eval("t085(456, 789)"), "456789"; is eval("t085(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t085' at \(eval \d+\) line 1\.\n\z/; is eval("t085(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t085' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t086 @@ -1032,13 +1032,13 @@ sub t086 { $a.$b } is prototype(\&t086), undef; is eval("t086()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t086' at \(eval \d+\) line 1\.\n\z/; is eval("t086(456)"), "456333"; is eval("t086(456, 789)"), "456789"; is eval("t086(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t086' at \(eval \d+\) line 1\.\n\z/; is eval("t086(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t086' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t087 @@ -1057,13 +1057,13 @@ sub t087 { $a.$b } is prototype(\&t087), undef; is eval("t087()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t087' at \(eval \d+\) line 1\.\n\z/; is eval("t087(456)"), "456333"; is eval("t087(456, 789)"), "456789"; is eval("t087(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t087' at \(eval \d+\) line 1\.\n\z/; is eval("t087(456, 789, 987, 654)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t087' at \(eval \d+\) line 1\.\n\z/; is $a, 123; eval "#line 8 foo\nsub t088 (\$ #foo\na) { }"; @@ -1123,25 +1123,25 @@ like $@, qr/\ACan't use global \%_ in "my" at foo line 8/; my $t103 = sub ($a) { $a || "z" }; is prototype($t103), undef; is eval("\$t103->()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::__ANON__' at \(eval \d+\) line 1\.\n\z/; is eval("\$t103->(0)"), "z"; is eval("\$t103->(456)"), 456; is eval("\$t103->(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::__ANON__' at \(eval \d+\) line 1\.\n\z/; is eval("\$t103->(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::__ANON__' at \(eval \d+\) line 1\.\n\z/; is $a, 123; my $t118 = sub ($a) :prototype($) { $a || "z" }; is prototype($t118), "\$"; is eval("\$t118->()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::__ANON__' at \(eval \d+\) line 1\.\n\z/; is eval("\$t118->(0)"), "z"; is eval("\$t118->(456)"), 456; is eval("\$t118->(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::__ANON__' at \(eval \d+\) line 1\.\n\z/; is eval("\$t118->(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::__ANON__' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t033 ($a = sub ($a) { $a."z" }) { $a->("a")."y" } @@ -1149,7 +1149,7 @@ is prototype(\&t033), undef; is eval("t033()"), "azy"; is eval("t033(sub { \"x\".\$_[0].\"x\" })"), "xaxy"; is eval("t033(sub { \"x\".\$_[0].\"x\" }, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t033' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t133 ($a = sub ($a = 222) { $a."z" }) { $a->()."/".$a->("a") } @@ -1157,7 +1157,7 @@ is prototype(\&t133), undef; is eval("t133()"), "222z/az"; is eval("t133(sub { \"x\".(\$_[0] // \"u\").\"x\" })"), "xux/xax"; is eval("t133(sub { \"x\".(\$_[0] // \"u\").\"x\" }, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t133' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t134 ($a = sub ($a, $t = sub { $_[0]."p" }) { $t->($a)."z" }) { @@ -1169,7 +1169,7 @@ is eval("t134(sub { \"x\".(\$_[1] // sub{\$_[0]})->(\$_[0]).\"x\" })"), "xax/xbqx"; is eval("t134(sub { \"x\".(\$_[1] // sub{\$_[0]})->(\$_[0]).\"x\" }, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t134' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t135 ($a = sub ($a, $t = sub ($p) { $p."p" }) { $t->($a)."z" }) { @@ -1181,7 +1181,7 @@ is eval("t135(sub { \"x\".(\$_[1] // sub{\$_[0]})->(\$_[0]).\"x\" })"), "xax/xbqx"; is eval("t135(sub { \"x\".(\$_[1] // sub{\$_[0]})->(\$_[0]).\"x\" }, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t135' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t132 ( @@ -1195,19 +1195,19 @@ is eval("t132(sub { \"x\".(\$_[1] // sub{\$_[0]})->(\$_[0]).\"x\" })"), "xax/xbqx"; is eval("t132(sub { \"x\".(\$_[1] // sub{\$_[0]})->(\$_[0]).\"x\" }, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t132' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t104($a) :method { $a || "z" } is prototype(\&t104), undef; is eval("t104()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t104' at \(eval \d+\) line 1\.\n\z/; is eval("t104(0)"), "z"; is eval("t104(456)"), 456; is eval("t104(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t104' at \(eval \d+\) line 1\.\n\z/; is eval("t104(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t104' at \(eval \d+\) line 1\.\n\z/; is $a, 123; sub t105($a) :prototype($) { $a || "z" } @@ -1225,13 +1225,13 @@ is $a, 123; sub t106($a) :prototype(@) { $a || "z" } is prototype(\&t106), "\@"; is eval("t106()"), undef; -like $@, qr/\AToo few arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo few arguments for subroutine 'main::t106' at \(eval \d+\) line 1\.\n\z/; is eval("t106(0)"), "z"; is eval("t106(456)"), 456; is eval("t106(456, 789)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t106' at \(eval \d+\) line 1\.\n\z/; is eval("t106(456, 789, 987)"), undef; -like $@, qr/\AToo many arguments for subroutine at \(eval \d+\) line 1\.\n\z/; +like $@, qr/\AToo many arguments for subroutine 'main::t106' at \(eval \d+\) line 1\.\n\z/; is $a, 123; eval "#line 8 foo\nsub t107 :method (\$a) { }"; @@ -1463,6 +1463,37 @@ is scalar(t145()), undef; "masking warning"; } +# Reporting subroutine names + +package T200 { + sub foo ($x) {} + *t201 = sub ($x) {} +} +*t202 = sub ($x) {}; +my $t203 = sub ($x) {}; +*t204 = *T200::foo; +*t205 = \&T200::foo; + +eval { T200::foo() }; +like($@, qr/^Too few arguments for subroutine 'T200::foo'/); +eval { T200::t201() }; +like($@, qr/^Too few arguments for subroutine 'T200::__ANON__'/); +eval { t202() }; +like($@, qr/^Too few arguments for subroutine 'main::__ANON__'/); +eval { $t203->() }; +like($@, qr/^Too few arguments for subroutine 'main::__ANON__'/); +eval { t204() }; +like($@, qr/^Too few arguments for subroutine 'T200::foo'/); +eval { t205() }; +like($@, qr/^Too few arguments for subroutine 'T200::foo'/); + + +# RT #130661 a char >= 0x80 in a signature when a sigil was expected +# was triggering an assertion + +eval "sub (\x80"; +like $@, qr/A signature parameter must start with/, "RT #130661"; + use File::Spec::Functions; diff --git a/t/op/smartmatch.t b/t/op/smartmatch.t index ca019fd..10d3539 100644 --- a/t/op/smartmatch.t +++ b/t/op/smartmatch.t @@ -76,7 +76,7 @@ my %keyandmore = map { $_ => 0 } @keyandmore; my %fooormore = map { $_ => 0 } @fooormore; # Load and run the tests -plan tests => 349+2; +plan tests => 349+4; while () { SKIP: { @@ -182,6 +182,25 @@ sub NOT_DEF() { undef } } +{ + # [perl #130705] + # Perl_ck_smartmatch would turn the match in: + # 0 =~ qr/1/ ~~ 0 # parsed as (0 =~ qr/1/) ~~ 0 + # into a qr, leaving the initial 0 on the stack after execution + # + # Similarly for: 0 ~~ (0 =~ qr/1/) + # + # Either caused an assertion failure in the context of warn (or print) + # if there was some other operator's arguments left on the stack, as with + # the test cases. + fresh_perl_is('print(0->[0 =~ qr/1/ ~~ 0])', '', + { switches => [ "-M-warnings=experimental::smartmatch" ] }, + "don't qr-ify left-side match against a stacked argument"); + fresh_perl_is('print(0->[0 ~~ (0 =~ qr/1/)])', '', + { switches => [ "-M-warnings=experimental::smartmatch" ] }, + "don't qr-ify right-side match against a stacked argument"); +} + # Prefix character : # - expected to match # ! - expected to not match diff --git a/t/op/stash.t b/t/op/stash.t index 8d2d628..c9634a3 100644 --- a/t/op/stash.t +++ b/t/op/stash.t @@ -6,7 +6,7 @@ BEGIN { set_up_inc( qw(../lib) ); } -plan( tests => 54 ); +plan( tests => 55 ); # Used to segfault (bug #15479) fresh_perl_like( @@ -349,3 +349,10 @@ is runperl( ), "ok\n", "[perl #128238] non-stashes in stashes"; + +is runperl( + prog => '%:: = (); print *{q|::|}, qq|\n|', + stderr => 1, + ), + "*main::main::\n", + "[perl #129869] lookup %:: by name after clearing %::"; diff --git a/t/op/substr.t b/t/op/substr.t index 83e7bae..a8abed8 100644 --- a/t/op/substr.t +++ b/t/op/substr.t @@ -22,7 +22,7 @@ $SIG{__WARN__} = sub { } }; -plan(390); +plan(391); run_tests() unless caller; @@ -877,3 +877,6 @@ is($destroyed, 1, 'Timely scalar destruction with lvalue substr'); is($result_3363, "best", "ref-to-substr retains lvalue-ness under recursion [perl #3363]"); } + +# failed with ASAN +fresh_perl_is('$0 = "/usr/bin/perl"; substr($0, 0, 0, $0)', '', {}, "(perl #129340) substr() with source in target"); diff --git a/t/op/svleak.t b/t/op/svleak.t index 77ff9ae..89fa63f 100644 --- a/t/op/svleak.t +++ b/t/op/svleak.t @@ -15,7 +15,7 @@ BEGIN { use Config; -plan tests => 138; +plan tests => 140; # run some code N times. If the number of SVs at the end of loop N is # greater than (N-1)*delta at the end of loop 1, we've got a leak @@ -560,3 +560,26 @@ EOF sub f { $a =~ /[^.]+$b/; } ::leak(2, 0, \&f, q{use re 'strict' shouldn't leak warning strings}); } + +# check that B::RHE->HASH does not leak +{ + package BHINT; + sub foo {} + require B; + my $op = B::svref_2object(\&foo)->ROOT->first; + sub lk { { my $d = $op->hints_hash->HASH } } + ::leak(3, 0, \&lk, q!B::RHE->HASH shoudln't leak!); +} + + +# dying while compiling a regex with codeblocks imported from an embedded +# qr// could leak + +{ + my sub codeblocks { + my $r = qr/(?{ 1; })/; + my $c = '(?{ 2; })'; + eval { /$r$c/ } + } + ::leak(2, 0, \&codeblocks, q{leaking embedded qr codeblocks}); +} diff --git a/t/op/tie.t b/t/op/tie.t index e5e7d30..12fc935 100644 --- a/t/op/tie.t +++ b/t/op/tie.t @@ -942,6 +942,24 @@ tie $foo, undef; EXPECT Can't locate object method "TIESCALAR" via package "main" at - line 2. ######## +# tie into nonexistent glob [RT#130623 assertion failure] +tie $foo, *FOO; +EXPECT +Can't locate object method "TIESCALAR" via package "FOO" at - line 2. +######## +# tie into glob when package exists but not method: no "*", no "main::" +{ package PackageWithoutTIESCALAR } +tie $foo, *PackageWithoutTIESCALAR; +EXPECT +Can't locate object method "TIESCALAR" via package "PackageWithoutTIESCALAR" at - line 3. +######## +# tie into reference [RT#130623 assertion failure] +eval { tie $foo, \"nope" }; +my $exn = $@ // ""; +print $exn =~ s/0x\w+/0xNNN/rg; +EXPECT +Can't locate object method "TIESCALAR" via package "SCALAR(0xNNN)" at - line 2. +######## # # STORE freeing tie'd AV sub TIEARRAY { bless [] } diff --git a/t/op/tr.t b/t/op/tr.t index 25c397d..323a5c3 100644 --- a/t/op/tr.t +++ b/t/op/tr.t @@ -13,7 +13,7 @@ BEGIN { use utf8; -plan tests => 215; +plan tests => 216; # Test this first before we extend the stack with other operations. # This caused an asan failure due to a bad write past the end of the stack. @@ -702,5 +702,15 @@ for ("", nullrocow) { } +{ # [perl #130656] This bug happens when the tr is split across lines, so + # that the first line causes it to go into UTF-8, and the 2nd is only + # things like \x + my $x = "\x{E235}"; + $x =~ tr + [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}] + [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}]; + + is $x, "\x{E5CE}", '[perl #130656]'; +} 1; diff --git a/t/op/write.t b/t/op/write.t index 3172681..d528a8e 100644 --- a/t/op/write.t +++ b/t/op/write.t @@ -98,7 +98,7 @@ for my $tref ( @NumTests ){ my $bas_tests = 21; # number of tests in section 3 -my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 4; +my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 15; # number of tests in section 4 my $hmb_tests = 37; @@ -1562,6 +1562,35 @@ ok defined *{$::{CmT}}{FORMAT}, "glob assign"; formline $format, $orig, 12345; is $^A, ("x" x 100) . " 12345\n", "\@* doesn't overflow"; + # ...nor this (RT #130703). + # Under 'use bytes', the two bytes (c2, 80) making up each \x80 char + # each get expanded to two bytes (so four in total per \x80 char); the + # buffer growth wasn't accounting for this doubling in size + + { + local $^A = ''; + my $format = "X\n\x{100}" . ("\x80" x 200); + my $expected = $format; + utf8::encode($expected); + use bytes; + formline($format); + is $^A, $expected, "RT #130703"; + } + + # further buffer overflows with RT #130703 + + { + local $^A = ''; + my $n = 200; + my $long = 'x' x 300; + my $numf = ('@###' x $n); + my $expected = $long . "\n" . (" 1" x $n); + formline("@*\n$numf", $long, ('1') x $n); + + is $^A, $expected, "RT #130703 part 2"; + } + + # make sure it can cope with formats > 64k $format = 'x' x 65537; @@ -2001,6 +2030,29 @@ EOP { stderr => 1 }, '#128255 Assert fail in S_sublex_done'); +{ + $^A = ""; + my $a = *globcopy; + my $r = eval { formline "^<<", $a }; + is $@, ""; + ok $r, "^ format with glob copy"; + is $^A, "*ma", "^ format with glob copy"; + is $a, "in::globcopy", "^ format with glob copy"; +} + +{ + $^A = ""; + my $r = eval { formline "^<<", *realglob }; + like $@, qr/\AModification of a read-only value attempted /; + is $r, undef, "^ format with real glob"; + is $^A, "*ma", "^ format with real glob"; + is ref(\*realglob), "GLOB"; +} + +$^A = ""; + +# [perl #130722] assertion failure +fresh_perl_is('for(1..2){formline*0}', '', { stderr => 1 } , "#130722 - assertion failure"); ############################# ## Section 4 diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 233f1fb..dec29bf 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1377,4 +1377,15 @@ setup => '$_ = ("0" x 100) . ("a" x 100);', code => '/[acgt]+/', }, + + 'regex::whilem::min_captures_fail' => { + desc => '/WHILEM with anon-greedy match and captures that fails', + setup => '$_ = ("a" x 20)', + code => '/^(?:(.)(.))*?[XY]/', + }, + 'regex::whilem::max_captures_fail' => { + desc => '/WHILEM with a greedy match and captures that fails', + setup => '$_ = ("a" x 20)', + code => '/^(?:(.)(.))*[XY]/', + }, ]; diff --git a/t/perf/taint.t b/t/perf/taint.t index 0c3ac82..797f0ad 100644 --- a/t/perf/taint.t +++ b/t/perf/taint.t @@ -28,16 +28,34 @@ use Scalar::Util qw(tainted); $| = 1; -plan tests => 2; +plan tests => 4; watchdog(60); +my $taint = substr($ENV{PATH}, 0, 0); # and empty tainted string + { - my $in = substr($ENV{PATH}, 0, 0) . ( "ab" x 200_000 ); + my $in = $taint . ( "ab" x 200_000 ); utf8::upgrade($in); ok(tainted($in), "performance issue only when tainted"); while ($in =~ /\Ga+b/g) { } pass("\\G on tainted string"); } +# RT #130584 +# tainted string caused the utf8 pos cache to be cleared each time + +{ + my $repeat = 30_000; + my $in = $taint . ("abcdefghijklmnopqrstuvwxyz" x $repeat); + utf8::upgrade($in); + ok(tainted($in), "performance issue only when tainted"); + local ${^UTF8CACHE} = 1; # defeat debugging + for my $i (1..$repeat) { + $in =~ /abcdefghijklmnopqrstuvwxyz/g or die; + my $p = pos($in); # this was slow + } + pass("RT #130584 pos on tainted utf8 string"); +} + 1; diff --git a/t/porting/customized.dat b/t/porting/customized.dat index 24df433..c73daf2 100644 --- a/t/porting/customized.dat +++ b/t/porting/customized.dat @@ -1,6 +1,3 @@ -CPAN cpan/CPAN/lib/App/Cpan.pm 3cef68c2a44a4996b432bc25622e3a544a188aa5 -CPAN cpan/CPAN/lib/CPAN.pm 4616a44963045f7bd07bb7f8e5f99bbd789af4e5 -CPAN cpan/CPAN/scripts/cpan 22610ed0301d48a269d1739afd2f7f84359d956f Digest cpan/Digest/Digest.pm 43f7f544cb11842b2f55c73e28930da50774e081 Encode cpan/Encode/Unicode/Unicode.pm 9749692c67f7d69083034de9184a93f070ab4799 ExtUtils::Constant cpan/ExtUtils-Constant/t/Constant.t a0369c919e216fb02767a637666bb4577ad79b02 @@ -44,7 +41,7 @@ Math::Complex cpan/Math-Complex/t/Trig.t 2682526e23a161d54732c2a66393fe4a234d186 Memoize cpan/Memoize/Memoize.pm 902092ff91cdec9c7b4bd06202eb179e1ce26ca2 Net::Ping dist/Net-Ping/t/000_load.t deff5dc2ca54dae28cb19d3631427db127279ac2 Net::Ping dist/Net-Ping/t/001_new.t 90c9d63509b3efc8941449fbd1ca8b807fa42040 -Net::Ping dist/Net-Ping/t/010_pingecho.t 2e7340ee0e9f6119b889016fc8b89e6bcd4a8fe2 +Net::Ping dist/Net-Ping/t/010_pingecho.t fd91db2daf78a994bd0210ab32cca2a46dff4f44 Net::Ping dist/Net-Ping/t/500_ping_icmp.t a003daa5eaf215e58234786bb1fbfbebf669bf44 Pod::Checker cpan/Pod-Checker/t/pod/contains_bad_pod.xr 73538fd80dfe6e19ad561fe034009b44460208f6 Pod::Checker cpan/Pod-Checker/t/pod/selfcheck.t 8ce3cfd38e4b9bcf5bc7fe7f2a14195e49aed7d8 @@ -55,6 +52,7 @@ Scalar-List-Utils cpan/Scalar-List-Utils/lib/List/Util/XS.pm 2d09b84a5575e678346 Scalar-List-Utils cpan/Scalar-List-Utils/lib/Scalar/Util.pm 98af8fe390d9c9fa11a1fafebbc68ea663cdce2c Scalar-List-Utils cpan/Scalar-List-Utils/lib/Sub/Util.pm 188cf0dd95cd7ba60fd6a366f440811fb52c2c79 Scalar-List-Utils cpan/Scalar-List-Utils/t/lln.t b7148c7f1cd9c70fd21d8153542b6d3de2b655ad +Scalar-List-Utils cpan/Scalar-List-Utils/t/tainted.t 9c52e04687ec8a7d23a1c38a762723858305b1fe Scalar-List-Utils cpan/Scalar-List-Utils/t/uniq.t 5de01094d2bca9cf8b2bbbca920122f7396bf11e Socket cpan/Socket/Socket.pm 98e38176d745c38282907f391c077298f5a3d0ba Socket cpan/Socket/Socket.xs edd4fed212785f11c5c2095a75941dad27d586d9 diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t index bf5c9fd..0665517 100644 --- a/t/re/fold_grind.t +++ b/t/re/fold_grind.t @@ -11,7 +11,10 @@ BEGIN { if ($^O eq 'dec_osf') { skip_all("$^O cannot handle this test"); } - watchdog(5 * 60); + my $time_out_factor = $ENV{PERL_TEST_TIME_OUT_FACTOR} || 1; + $time_out_factor = 1 if $time_out_factor < 1; + + watchdog(5 * 60 * $time_out_factor); require './loc_tools.pl'; } diff --git a/t/re/pat.t b/t/re/pat.t index d5e5d2f..16bfc8e 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -23,7 +23,7 @@ BEGIN { skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; skip_all_without_unicode_tables(); -plan tests => 835; # Update this when adding/deleting tests. +plan tests => 837; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1901,6 +1901,20 @@ EOP }msx, { stderr => 1 }, "Offsets in debug output are not negative"); } } + { + # buffer overflow + fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx", + "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n", + {}, "buffer overflow for regexp component"); + } + { + # [perl #129281] buffer write overflow, detected by ASAN, valgrind + fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much"); + } } # End of sub run_tests 1; + +# +# ex: set ts=8 sts=4 sw=4 et: +# diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index e59b059..6921d38 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -22,7 +22,7 @@ BEGIN { } -plan tests => 527; # Update this when adding/deleting tests. +plan tests => 533; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1232,6 +1232,66 @@ sub run_tests { 'padtmp swiping does not affect "$a$b" =~ /(??{})/' } + { + # [perl #129140] + # this used to cause a double-free of the code_block struct + # when re-running the compilation after spotting utf8. + # This test doesn't catch it, but might panic, or fail under + # valgrind etc + + my $s = ''; + /$s(?{})\x{100}/ for '', ''; + pass "RT #129140"; + } + + # RT #130650 code blocks could get double-freed during a pattern + # compilation croak + + { + # this used to panic or give ASAN errors + eval 'qr/(?{})\6/'; + like $@, qr/Reference to nonexistent group/, "RT #130650"; + } + + # RT #129881 + # on exit from a pattern with multiple code blocks from different + # CVs, PL_comppad wasn't being restored correctly + + sub { + # give first few pad slots known values + my ($x1, $x2, $x3, $x4, $x5) = 101..105; + # these vars are in a separate pad + my $r = qr/((?{my ($y1, $y2) = 201..202; 1;})A){2}X/; + # the first alt fails, causing a switch to this anon + # sub's pad + "AAA" =~ /$r|(?{my ($z1, $z2) = 301..302; 1;})A/; + is $x1, 101, "RT #129881: x1"; + is $x2, 102, "RT #129881: x2"; + is $x3, 103, "RT #129881: x3"; + }->(); + + + # RT #126697 + # savestack wasn't always being unwound on EVAL failure + { + local our $i = 0; + my $max = 0; + + 'ABC' =~ m{ + \A + (?: + (?: AB | A | BC ) + (?{ + local $i = $i + 1; + $max = $i if $max < $i; + }) + )* + \z + }x; + is $max, 2, "RT #126697"; + } + + } # End of sub run_tests 1; diff --git a/t/re/pat_rt_report.t b/t/re/pat_rt_report.t index 2b6063c..dd740e7 100644 --- a/t/re/pat_rt_report.t +++ b/t/re/pat_rt_report.t @@ -20,7 +20,7 @@ use warnings; use 5.010; use Config; -plan tests => 2502; # Update this when adding/deleting tests. +plan tests => 2504; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1131,6 +1131,16 @@ EOP my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}"; ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085"); } + { + # rt + fresh_perl_is( + 'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"', + "ok", {}, 'RT #130561 - allowing impossible quantifier should not cause SEGVs'); + my $s= "foo"; + no warnings 'regexp'; + ok($s=~/(foo){1,0}|(?1)/, + "RT #130561 - allowing impossible quantifier should not break recursion"); + } } # End of sub run_tests diff --git a/t/re/re_tests b/t/re/re_tests index f210202..410fcea 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1464,13 +1464,10 @@ abc\N abc\n n # Verify get errors. For these, we need // or else puts it in single quotes, # and bypasses the lexer. /\N{U+}/ - c - Invalid hexadecimal number -# Below currently gives a misleading message -/[\N{U+}]/ - Sc - Unmatched -/[\N{U+}]/ - sc - Syntax error in (?[...]) +/[\N{U+}]/ - c - Invalid hexadecimal number /abc\N{def/ - c - Missing right brace /\N{U+4AG3}/ - c - Invalid hexadecimal number -/[\N{U+4AG3}]/ - Sc - Unmatched -/[\N{U+4AG3}]/ - sc - Syntax error in (?[...]) +/[\N{U+4AG3}]/ - c - Invalid hexadecimal number # And verify that in single quotes which bypasses the lexer, the regex compiler # figures it out. diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 22711d5..597df92 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -532,10 +532,10 @@ my @warning = ( ], '/a{1,1}?\x{100}/' => 'Useless use of greediness modifier \'?\' {#} m/a{1,1}?{#}\x{100}/', "/(?[ [ % - % ] ])/" => "", - "/(?[ [ : - \\x$colon_hex ] ])\\x{100}/" => "\": - \\x$colon_hex \" is more clearly written simply as \":\". This will be a fatal error in Perl 5.28 {#} m/(?[ [ : - \\x$colon_hex {#}] ])\\x{100}/", - "/(?[ [ \\x$colon_hex - : ] ])\\x{100}/" => "\"\\x$colon_hex\ - : \" is more clearly written simply as \":\". This will be a fatal error in Perl 5.28 {#} m/(?[ [ \\x$colon_hex - : {#}] ])\\x{100}/", - "/(?[ [ \\t - \\x$tab_hex ] ])\\x{100}/" => "\"\\t - \\x$tab_hex \" is more clearly written simply as \"\\t\". This will be a fatal error in Perl 5.28 {#} m/(?[ [ \\t - \\x$tab_hex {#}] ])\\x{100}/", - "/(?[ [ \\x$tab_hex - \\t ] ])\\x{100}/" => "\"\\x$tab_hex\ - \\t \" is more clearly written simply as \"\\t\". This will be a fatal error in Perl 5.28 {#} m/(?[ [ \\x$tab_hex - \\t {#}] ])\\x{100}/", + "/(?[ [ : - \\x$colon_hex ] ])\\x{100}/" => "\": - \\x$colon_hex \" is more clearly written simply as \":\" {#} m/(?[ [ : - \\x$colon_hex {#}] ])\\x{100}/", + "/(?[ [ \\x$colon_hex - : ] ])\\x{100}/" => "\"\\x$colon_hex\ - : \" is more clearly written simply as \":\" {#} m/(?[ [ \\x$colon_hex - : {#}] ])\\x{100}/", + "/(?[ [ \\t - \\x$tab_hex ] ])\\x{100}/" => "\"\\t - \\x$tab_hex \" is more clearly written simply as \"\\t\" {#} m/(?[ [ \\t - \\x$tab_hex {#}] ])\\x{100}/", + "/(?[ [ \\x$tab_hex - \\t ] ])\\x{100}/" => "\"\\x$tab_hex\ - \\t \" is more clearly written simply as \"\\t\" {#} m/(?[ [ \\x$tab_hex - \\t {#}] ])\\x{100}/", "/(?[ [ $B_hex - C ] ])/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/(?[ [ $B_hex - C {#}] ])/", "/(?[ [ A - $B_hex ] ])/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/(?[ [ A - $B_hex {#}] ])/", "/(?[ [ $low_mixed_alpha - $high_mixed_alpha ] ])/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/(?[ [ $low_mixed_alpha - $high_mixed_alpha {#}] ])/", @@ -611,10 +611,10 @@ my @warning_only_under_strict = ( '/[\N{U+FF}-\x{100}]/' => 'Both or neither range ends should be Unicode {#} m/[\N{U+FF}-\x{100}{#}]/', '/[\N{U+100}-\x{101}]/' => "", "/[%-%]/" => "", - "/[:-\\x$colon_hex]\\x{100}/" => "\":-\\x$colon_hex\" is more clearly written simply as \":\". This will be a fatal error in Perl 5.28 {#} m/[:-\\x$colon_hex\{#}]\\x{100}/", - "/[\\x$colon_hex-:]\\x{100}/" => "\"\\x$colon_hex-:\" is more clearly written simply as \":\". This will be a fatal error in Perl 5.28 {#} m/[\\x$colon_hex\-:{#}]\\x{100}/", - "/[\\t-\\x$tab_hex]\\x{100}/" => "\"\\t-\\x$tab_hex\" is more clearly written simply as \"\\t\". This will be a fatal error in Perl 5.28 {#} m/[\\t-\\x$tab_hex\{#}]\\x{100}/", - "/[\\x$tab_hex-\\t]\\x{100}/" => "\"\\x$tab_hex-\\t\" is more clearly written simply as \"\\t\". This will be a fatal error in Perl 5.28 {#} m/[\\x$tab_hex\-\\t{#}]\\x{100}/", + "/[:-\\x$colon_hex]\\x{100}/" => "\":-\\x$colon_hex\" is more clearly written simply as \":\" {#} m/[:-\\x$colon_hex\{#}]\\x{100}/", + "/[\\x$colon_hex-:]\\x{100}/" => "\"\\x$colon_hex-:\" is more clearly written simply as \":\" {#} m/[\\x$colon_hex\-:{#}]\\x{100}/", + "/[\\t-\\x$tab_hex]\\x{100}/" => "\"\\t-\\x$tab_hex\" is more clearly written simply as \"\\t\" {#} m/[\\t-\\x$tab_hex\{#}]\\x{100}/", + "/[\\x$tab_hex-\\t]\\x{100}/" => "\"\\x$tab_hex-\\t\" is more clearly written simply as \"\\t\" {#} m/[\\x$tab_hex\-\\t{#}]\\x{100}/", "/[$B_hex-C]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[$B_hex-C{#}]/", "/[A-$B_hex]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[A-$B_hex\{#}]/", "/[$low_mixed_alpha-$high_mixed_alpha]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[$low_mixed_alpha-$high_mixed_alpha\{#}]/", diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t index 994d0a2..6a79f9d 100644 --- a/t/re/regex_sets.t +++ b/t/re/regex_sets.t @@ -165,13 +165,7 @@ for my $char ("Ù ", "Ù¥", "Ù©") { like($@, qr/^Syntax error/, '/(?[(\c]) ]\b/ should be a syntax error'); eval { $_ = '(?[\c[]](])'; qr/$_/ }; like($@, qr/^Syntax error/, '/(?[\c[]](])/ should be a syntax error'); - { - # This block needs to go after 5.26, as it will be - # fatal in 5.28. But it's not fatal yet, so we ought - # to test it. - no warnings 'deprecated'; - like("\c#", qr/(?[\c#])/, '\c# should match itself'); - } + like("\c#", qr/(?[\c#])/, '\c# should match itself'); like("\c[", qr/(?[\c[])/, '\c[ should match itself'); like("\c\ ", qr/(?[\c\])/, '\c\ should match itself'); like("\c]", qr/(?[\c]])/, '\c] should match itself'); diff --git a/t/run/switchDx.t b/t/run/switchDx.t index 43f31bf..9ea0a32 100644 --- a/t/run/switchDx.t +++ b/t/run/switchDx.t @@ -11,7 +11,7 @@ use Config; my $perlio_log = "perlio$$.txt"; skip_all "DEBUGGING build required" - unless $::Config{ccflags} =~ /DEBUGGING/ + unless $::Config{ccflags} =~ /(? 8; diff --git a/t/uni/parser.t b/t/uni/parser.t index 624fdd0..2c68fb0 100644 --- a/t/uni/parser.t +++ b/t/uni/parser.t @@ -197,7 +197,7 @@ like( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' ); ? "\x{74}\x{41}" : "\x{c0}\x{a0}"; CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\""; - like( $@, qr/Malformed UTF-8 character immediately after '\\N\{abc' at .* within string/, 'Malformed UTF-8 input to \N{}'); + like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}'); } # RT# 124216: Perl_sv_clear: Assertion diff --git a/taint.c b/taint.c index f1f6b7b..1b78928 100644 --- a/taint.c +++ b/taint.c @@ -78,7 +78,6 @@ void Perl_taint_env(pTHX) { SV** svp; - MAGIC* mg; const char* const *e; static const char* const misc_env[] = { "IFS", /* most shells' inter-field separators */ @@ -121,6 +120,7 @@ Perl_taint_env(pTHX) STRLEN len = 8; /* strlen(name) */ while (1) { + MAGIC* mg; if (i) len = my_sprintf(name,"DCL$PATH;%d", i); svp = hv_fetch(GvHVn(PL_envgv), name, len, FALSE); @@ -141,6 +141,7 @@ Perl_taint_env(pTHX) svp = hv_fetchs(GvHVn(PL_envgv),"PATH",FALSE); if (svp && *svp) { + MAGIC* mg; if (SvTAINTED(*svp)) { TAINT; taint_proper("Insecure %s%s", "$ENV{PATH}"); diff --git a/toke.c b/toke.c index 3f13f76..5a711d3 100644 --- a/toke.c +++ b/toke.c @@ -669,7 +669,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) Creates and initialises a new lexer/parser state object, supplying a context in which to lex and parse from a new source of Perl code. A pointer to the new state object is placed in L. An entry -is made on the save stack so that upon unwinding the new state object +is made on the save stack so that upon unwinding, the new state object will be destroyed and the former value of L will be restored. Nothing else need be done to clean up the parsing context. @@ -701,6 +701,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) { const char *s = NULL; yy_parser *parser, *oparser; + if (flags && flags & ~LEX_START_FLAGS) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); @@ -726,6 +727,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->lex_state = LEX_NORMAL; parser->expect = XSTATE; parser->rsfp = rsfp; + parser->recheck_utf8_validity = FALSE; parser->rsfp_filters = !(flags & LEX_START_SAME_FILTER) || !oparser ? NULL @@ -742,7 +744,22 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) if (line) { STRLEN len; + const U8* first_bad_char_loc; + s = SvPV_const(line, len); + + if ( SvUTF8(line) + && UNLIKELY(! is_utf8_string_loc((U8 *) s, + SvCUR(line), + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) s + SvCUR(line), + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } + parser->linestr = flags & LEX_START_COPIED ? SvREFCNT_inc_simple_NN(line) : newSVpvn_flags(s, len, SvUTF8(line)); @@ -751,6 +768,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) } else { parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); } + parser->oldoldbufptr = parser->oldbufptr = parser->bufptr = @@ -1039,12 +1057,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { p++; highhalf++; - } else if (! UTF8_IS_INVARIANT(c)) { - _force_out_malformed_utf8_message((U8 *) p, (U8 *) e, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ - } + } else assert(UTF8_IS_INVARIANT(c)); } if (!highhalf) goto plain_copy; @@ -1258,6 +1271,24 @@ Perl_lex_discard_to(pTHX_ char *ptr) PL_parser->last_lop -= discard_len; } +void +Perl_notify_parser_that_changed_to_utf8(pTHX) +{ + /* Called when $^H is changed to indicate that HINT_UTF8 has changed from + * off to on. At compile time, this has the effect of entering a 'use + * utf8' section. This means that any input was not previously checked for + * UTF-8 (because it was off), but now we do need to check it, or our + * assumptions about the input being sane could be wrong, and we could + * segfault. This routine just sets a flag so that the next time we look + * at the input we do the well-formed UTF-8 check. If we aren't in the + * proper phase, there may not be a parser object, but if there is, setting + * the flag is harmless */ + + if (PL_parser) { + PL_parser->recheck_utf8_validity = TRUE; + } +} + /* =for apidoc Amx|bool|lex_next_chunk|U32 flags @@ -1293,7 +1324,6 @@ Perl_lex_next_chunk(pTHX_ U32 flags) STRLEN linestart_pos, last_uni_pos, last_lop_pos; bool got_some_for_debugger = 0; bool got_some; - const U8* first_bad_char_loc; if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); @@ -1360,15 +1390,19 @@ Perl_lex_next_chunk(pTHX_ U32 flags) PL_parser->bufend = buf + new_bufend_pos; PL_parser->bufptr = buf + bufptr_pos; - if (UTF && ! is_utf8_string_loc((U8 *) PL_parser->bufptr, - PL_parser->bufend - PL_parser->bufptr, - &first_bad_char_loc)) - { - _force_out_malformed_utf8_message(first_bad_char_loc, - (U8 *) PL_parser->bufend, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ + if (UTF) { + const U8* first_bad_char_loc; + if (UNLIKELY(! is_utf8_string_loc( + (U8 *) PL_parser->bufptr, + PL_parser->bufend - PL_parser->bufptr, + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) PL_parser->bufend, + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } } PL_parser->oldbufptr = buf + oldbufptr_pos; @@ -2143,7 +2177,7 @@ Perl_str_to_version(pTHX_ SV *sv) STRLEN len; const char *start = SvPV_const(sv,len); const char * const end = start + len; - const bool utf = SvUTF8(sv) ? TRUE : FALSE; + const bool utf = cBOOL(SvUTF8(sv)); PERL_ARGS_ASSERT_STR_TO_VERSION; @@ -2249,10 +2283,9 @@ S_force_strict_version(pTHX_ char *s) /* * S_tokeq - * Tokenize a quoted string passed in as an SV. It finds the next - * chunk, up to end of string or a backslash. It may make a new - * SV containing that chunk (if HINT_NEW_STRING is on). It also - * turns \\ into \. + * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv', + * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is + * unchanged, and a new SV containing the modified input is returned. */ STATIC SV * @@ -2557,7 +2590,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) SV *cv; SV *rv; HV *stash; - const U8* first_bad_char_loc; const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; @@ -2567,21 +2599,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) return res; } - if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, - e - backslash_ptr, - &first_bad_char_loc)) - { - _force_out_malformed_utf8_message(first_bad_char_loc, - (U8 *) PL_parser->bufend, - 0, - 0 /* 0 means don't die */ ); - yyerror_pv(Perl_form(aTHX_ - "Malformed UTF-8 character immediately after '%.*s'", - (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr), - SVf_UTF8); - return NULL; - } - res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, /* include the <}> */ e - backslash_ptr + 1); @@ -2706,7 +2723,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) const U8* first_bad_char_loc; STRLEN len; const char* const str = SvPV_const(res, len); - if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { + if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, + &first_bad_char_loc))) + { _force_out_malformed_utf8_message(first_bad_char_loc, (U8 *) PL_parser->bufend, 0, @@ -2852,8 +2871,6 @@ S_scan_const(pTHX_ char *start) bool didrange = FALSE; /* did we just finish a range? */ bool in_charclass = FALSE; /* within /[...]/ */ bool has_utf8 = FALSE; /* Output constant is UTF8 */ - bool has_above_latin1 = FALSE; /* does something require special - handling in tr/// ? */ bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be UTF8? But, this can show as true when the source isn't utf8, as for @@ -2868,6 +2885,14 @@ S_scan_const(pTHX_ char *start) STRLEN offset_to_max; /* The offset in the output to where the range high-end character is temporarily placed */ + /* Does something require special handling in tr/// ? This avoids extra + * work in a less likely case. As such, khw didn't feel it was worth + * adding any branches to the more mainline code to handle this, which + * means that this doesn't get set in some circumstances when things like + * \x{100} get expanded out. As a result there needs to be extra testing + * done in the tr code */ + bool has_above_latin1 = FALSE; + /* Note on sizing: The scanned constant is placed into sv, which is * initialized by newSV() assuming one byte of output for every byte of * input. This routine expects newSV() to allocate an extra byte for a @@ -2948,7 +2973,7 @@ S_scan_const(pTHX_ char *start) /* The tests here for being above Latin1 and similar ones * in the following 'else' suffice to find all such * occurences in the constant, except those added by a - * backslash escape sequence, like \x{100}. And all those + * backslash escape sequence, like \x{100}. Mostly, those * set 'has_above_latin1' as appropriate */ if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { has_above_latin1 = TRUE; @@ -3004,7 +3029,6 @@ S_scan_const(pTHX_ char *start) bool convert_unicode; IV real_range_max = 0; #endif - /* Get the code point values of the range ends. */ if (has_utf8) { /* We know the utf8 is valid, because we just constructed @@ -3012,6 +3036,13 @@ S_scan_const(pTHX_ char *start) min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); + + /* This compensates for not all code setting + * 'has_above_latin1', so that we don't skip stuff that + * should be executed */ + if (range_max > 255) { + has_above_latin1 = TRUE; + } } else { min_ptr = max_ptr - 1; @@ -3023,7 +3054,11 @@ S_scan_const(pTHX_ char *start) * that code point is already in the output, twice. We can * just back up over the second instance and avoid all the rest * of the work. But if it is a variant character, it's been - * counted twice, so decrement */ + * counted twice, so decrement. (This unlikely scenario is + * special cased, like the one for a range of 2 code points + * below, only because the main-line code below needs a range + * of 3 or more to work without special casing. Might as well + * get it out of the way now.) */ if (UNLIKELY(range_max == range_min)) { d = max_ptr; if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) { @@ -3240,18 +3275,18 @@ S_scan_const(pTHX_ char *start) #endif /* Always gets run for ASCII, and sometimes for EBCDIC. */ { - SSize_t i; - /* Here, no conversions are necessary, which means that the * first character in the range is already in 'd' and * valid, so we can skip overwriting it */ if (has_utf8) { + SSize_t i; d += UTF8SKIP(d); for (i = range_min + 1; i <= range_max; i++) { append_utf8_from_native_byte((U8) i, (U8 **) &d); } } else { + SSize_t i; d++; assert(range_min + 1 <= range_max); for (i = range_min + 1; i < range_max; i++) { @@ -3606,6 +3641,7 @@ S_scan_const(pTHX_ char *start) s++; if (*s != '{') { yyerror("Missing braces on \\N{}"); + *d++ = '\0'; continue; } s++; @@ -3617,7 +3653,7 @@ S_scan_const(pTHX_ char *start) } else { yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); } - continue; + yyquit(); /* Have exhausted the input. */ } /* Here it looks like a named character */ @@ -3636,6 +3672,7 @@ S_scan_const(pTHX_ char *start) "Invalid hexadecimal number in \\N{U+...}" ); s = e + 1; + *d++ = '\0'; continue; } while (++s < e) { @@ -3834,6 +3871,7 @@ S_scan_const(pTHX_ char *start) " in transliteration operator", /* +1 to include the "}" */ (int) (e + 1 - start), start)); + *d++ = '\0'; goto end_backslash_N; } @@ -3899,15 +3937,16 @@ S_scan_const(pTHX_ char *start) case 'c': s++; if (s < send) { - *d++ = grok_bslash_c(*s++, 1); + *d++ = grok_bslash_c(*s, 1); } else { yyerror("Missing control char name in \\c"); + yyquit(); /* Are at end of input, no sense continuing */ } #ifdef EBCDIC non_portable_endpoint++; #endif - continue; + break; /* printf-style backslashes, formfeeds, newlines, etc */ case 'b': @@ -4152,10 +4191,7 @@ S_intuit_more(pTHX_ char *s) weight -= seen[un_char] * 10; if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { int len; - char *tmp = PL_bufend; - PL_bufend = (char*)send; - scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); - PL_bufend = tmp; + scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); len = (int)strlen(tmpbuf); if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0, SVt_PV)) @@ -4276,11 +4312,14 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) } if (*start == '$') { + SSize_t start_off = start - SvPVX(PL_linestr); if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || isUPPER(*PL_tokenbuf)) return 0; - s = skipspace(s); - PL_bufptr = start; + /* this could be $# */ + if (isSPACE(*s)) + s = skipspace(s); + PL_bufptr = SvPVX(PL_linestr) + start_off; PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; } @@ -4725,6 +4764,20 @@ Perl_yylex(pTHX) GV *gv = NULL; GV **gvp = NULL; + if (UNLIKELY(PL_parser->recheck_utf8_validity)) { + const U8* first_bad_char_loc; + if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, + PL_bufend - PL_bufptr, + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) PL_bufend, + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } + PL_parser->recheck_utf8_validity = FALSE; + } DEBUG_T( { SV* tmp = newSVpvs(""); PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", @@ -4972,7 +5025,16 @@ Perl_yylex(pTHX) s = PL_bufend; } else { + int save_error_count = PL_error_count; + s = scan_const(PL_bufptr); + + /* Set flag if this was a pattern and there were errors. op.c will + * refuse to compile a pattern with this flag set. Otherwise, we + * could get segfaults, etc. */ + if (PL_lex_inpat && PL_error_count > save_error_count) { + ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; + } if (*s == '\\') PL_lex_state = LEX_INTERPCASEMOD; else @@ -5023,7 +5085,7 @@ Perl_yylex(pTHX) * as a var; e.g. ($, ...) would be seen as the var '$,' */ - char sigil; + U8 sigil; s = skipspace(s); sigil = *s++; @@ -5081,12 +5143,6 @@ Perl_yylex(pTHX) switch (*s) { default: if (UTF) { - if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) { - _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ - } if (isIDFIRST_utf8_safe(s, PL_bufend)) { goto keylookup; } @@ -5217,7 +5273,7 @@ Perl_yylex(pTHX) } do { fake_eof = 0; - bof = PL_rsfp ? TRUE : FALSE; + bof = cBOOL(PL_rsfp); if (0) { fake_eof: fake_eof = LEX_FAKE_EOF; @@ -5719,8 +5775,7 @@ Perl_yylex(pTHX) } else if (PL_expect == XPOSTDEREF) POSTDEREF('%'); PL_tokenbuf[0] = '%'; - s = scan_ident(s, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { PREREF('%'); @@ -6266,8 +6321,7 @@ Perl_yylex(pTHX) } PL_tokenbuf[0] = '&'; - s = scan_ident(s - 1, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, TRUE); + s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); pl_yylval.ival = (OPpENTERSUB_AMPER<<8); if (PL_tokenbuf[1]) { force_ident_maybe_lex('&'); @@ -6547,8 +6601,7 @@ Perl_yylex(pTHX) } PL_tokenbuf[0] = '$'; - s = scan_ident(s, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); if (PL_expect == XOPERATOR) { d = s; if (PL_bufptr > s) { @@ -7260,17 +7313,24 @@ Perl_yylex(pTHX) == OA_FILEREF)) { bool immediate_paren = *s == '('; + SSize_t s_off; /* (Now we can afford to cross potential line boundary.) */ s = skipspace(s); + /* intuit_method() can indirectly call lex_next_chunk(), + * invalidating s + */ + s_off = s - SvPVX(PL_linestr); /* Two barewords in a row may indicate method call. */ if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') && (tmp = intuit_method(s, lex ? NULL : sv, cv))) { + /* the code at method: doesn't use s */ goto method; } + s = SvPVX(PL_linestr) + s_off; /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ @@ -8980,7 +9040,6 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) s++; if (*s == ',') { GV* gv; - PADOFFSET off; if (keyword(w, s - w, 0)) return; @@ -8988,6 +9047,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) if (gv && GvCVu(gv)) return; if (s - w <= 254) { + PADOFFSET off; char tmpbuf[256]; Copy(w, tmpbuf+1, s - w, char); *tmpbuf = '&'; @@ -10412,7 +10472,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re const char * non_grapheme_msg = "Use of unassigned code point or" " non-standalone grapheme for a delimiter" " will be a fatal error starting in Perl" - " v5.30"; + " 5.30"; /* The only non-UTF character that isn't a stand alone grapheme is * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */ bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED); @@ -10511,10 +10571,15 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re } /* terminate when run out of buffer (the for() condition), or have found the terminator */ - else if (*s == term) { - if (termlen == 1) + else if (*s == term) { /* First byte of terminator matches */ + if (termlen == 1) /* If is the only byte, are done */ break; - if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + + /* If the remainder of the terminator matches, also are + * done, after checking that is a separate grapheme */ + if ( s + termlen <= PL_bufend + && memEQ(s + 1, (char*)termstr + 1, termlen - 1)) + { if ( check_grapheme && UNLIKELY(! _is_grapheme((U8 *) start, (U8 *) s, @@ -10525,9 +10590,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re "%s", non_grapheme_msg); } break; + } } - else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) + else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) { has_utf8 = TRUE; + } + *to = *s; } } @@ -11267,8 +11335,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) STATIC char * S_scan_formline(pTHX_ char *s) { - char *eol; - char *t; SV * const stuff = newSVpvs(""); bool needargs = FALSE; bool eofmt = FALSE; @@ -11276,8 +11342,9 @@ S_scan_formline(pTHX_ char *s) PERL_ARGS_ASSERT_SCAN_FORMLINE; while (!needargs) { + char *eol; if (*s == '.') { - t = s+1; + char *t = s+1; #ifdef PERL_STRICT_CR while (SPACE_OR_TAB(*t)) t++; @@ -11294,6 +11361,7 @@ S_scan_formline(pTHX_ char *s) if (!eol++) eol = PL_bufend; if (*s != '#') { + char *t; for (t = s; t < eol; t++) { if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { needargs = FALSE; @@ -11395,6 +11463,29 @@ S_yywarn(pTHX_ const char *const s, U32 flags) return 0; } +void +Perl_abort_execution(pTHX_ const char * const msg, const char * const name) +{ + PERL_ARGS_ASSERT_ABORT_EXECUTION; + + if (PL_minus_c) + Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name); + else { + Perl_croak(aTHX_ + "%sExecution of %s aborted due to compilation errors.\n", msg, name); + } + NOT_REACHED; /* NOTREACHED */ +} + +void +Perl_yyquit(pTHX) +{ + /* Called, after at least one error has been found, to abort the parse now, + * instead of trying to forge ahead */ + + yyerror_pvn(NULL, 0, 0); +} + int Perl_yyerror(pTHX_ const char *const s) { @@ -11418,100 +11509,120 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) SV * const where_sv = newSVpvs_flags("", SVs_TEMP); int yychar = PL_parser->yychar; - PERL_ARGS_ASSERT_YYERROR_PVN; - - if (!yychar || (yychar == ';' && !PL_rsfp)) - sv_catpvs(where_sv, "at EOF"); - else if ( PL_oldoldbufptr - && PL_bufptr > PL_oldoldbufptr - && PL_bufptr - PL_oldoldbufptr < 200 - && PL_oldoldbufptr != PL_oldbufptr - && PL_oldbufptr != PL_bufptr) - { - /* - Only for NetWare: - The code below is removed for NetWare because it abends/crashes on NetWare - when the script has error such as not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ + /* Output error message 's' with length 'len'. 'flags' are SV flags that + * apply. If the number of errors found is large enough, it abandons + * parsing. If 's' is NULL, there is no message, and it abandons + * processing unconditionally */ + + if (s != NULL) { + if (!yychar || (yychar == ';' && !PL_rsfp)) + sv_catpvs(where_sv, "at EOF"); + else if ( PL_oldoldbufptr + && PL_bufptr > PL_oldoldbufptr + && PL_bufptr - PL_oldoldbufptr < 200 + && PL_oldoldbufptr != PL_oldbufptr + && PL_oldbufptr != PL_bufptr) + { + /* + Only for NetWare: + The code below is removed for NetWare because it + abends/crashes on NetWare when the script has error such as + not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ #ifndef NETWARE - while (isSPACE(*PL_oldoldbufptr)) - PL_oldoldbufptr++; + while (isSPACE(*PL_oldoldbufptr)) + PL_oldoldbufptr++; #endif - context = PL_oldoldbufptr; - contlen = PL_bufptr - PL_oldoldbufptr; - } - else if ( PL_oldbufptr - && PL_bufptr > PL_oldbufptr - && PL_bufptr - PL_oldbufptr < 200 - && PL_oldbufptr != PL_bufptr) { - /* - Only for NetWare: - The code below is removed for NetWare because it abends/crashes on NetWare - when the script has error such as not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ + context = PL_oldoldbufptr; + contlen = PL_bufptr - PL_oldoldbufptr; + } + else if ( PL_oldbufptr + && PL_bufptr > PL_oldbufptr + && PL_bufptr - PL_oldbufptr < 200 + && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it + abends/crashes on NetWare when the script has error such as + not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ #ifndef NETWARE - while (isSPACE(*PL_oldbufptr)) - PL_oldbufptr++; + while (isSPACE(*PL_oldbufptr)) + PL_oldbufptr++; #endif - context = PL_oldbufptr; - contlen = PL_bufptr - PL_oldbufptr; - } - else if (yychar > 255) - sv_catpvs(where_sv, "next token ???"); - else if (yychar == YYEMPTY) { - if (PL_lex_state == LEX_NORMAL) - sv_catpvs(where_sv, "at end of line"); - else if (PL_lex_inpat) - sv_catpvs(where_sv, "within pattern"); - else - sv_catpvs(where_sv, "within string"); - } - else { - sv_catpvs(where_sv, "next char "); - if (yychar < 32) - Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); - else if (isPRINT_LC(yychar)) { - const char string = yychar; - sv_catpvn(where_sv, &string, 1); - } - else - Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); - } - msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); - Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", - OutCopFILE(PL_curcop), - (IV)(PL_parser->preambling == NOLINE - ? CopLINE(PL_curcop) - : PL_parser->preambling)); - if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", - UTF8fARG(UTF, contlen, context)); - else - Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); - if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { - Perl_sv_catpvf(aTHX_ msg, - " (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n", - (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); - PL_multi_end = 0; - } - if (PL_in_eval & EVAL_WARNONLY) { - PL_in_eval &= ~EVAL_WARNONLY; - Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); + context = PL_oldbufptr; + contlen = PL_bufptr - PL_oldbufptr; + } + else if (yychar > 255) + sv_catpvs(where_sv, "next token ???"); + else if (yychar == YYEMPTY) { + if (PL_lex_state == LEX_NORMAL) + sv_catpvs(where_sv, "at end of line"); + else if (PL_lex_inpat) + sv_catpvs(where_sv, "within pattern"); + else + sv_catpvs(where_sv, "within string"); + } + else { + sv_catpvs(where_sv, "next char "); + if (yychar < 32) + Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); + else if (isPRINT_LC(yychar)) { + const char string = yychar; + sv_catpvn(where_sv, &string, 1); + } + else + Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); + } + msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); + Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", + OutCopFILE(PL_curcop), + (IV)(PL_parser->preambling == NOLINE + ? CopLINE(PL_curcop) + : PL_parser->preambling)); + if (context) + Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", + UTF8fARG(UTF, contlen, context)); + else + Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); + if ( PL_multi_start < PL_multi_end + && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) + { + Perl_sv_catpvf(aTHX_ msg, + " (Might be a runaway multi-line %c%c string starting on" + " line %" IVdf ")\n", + (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); + PL_multi_end = 0; + } + if (PL_in_eval & EVAL_WARNONLY) { + PL_in_eval &= ~EVAL_WARNONLY; + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); + } + else { + qerror(msg); + } } - else - qerror(msg); - if (PL_error_count >= 10) { - SV * errsv; - if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) - Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n", - SVfARG(errsv), OutCopFILE(PL_curcop)); - else - Perl_croak(aTHX_ "%s has too many errors.\n", - OutCopFILE(PL_curcop)); + if (s == NULL || PL_error_count >= 10) { + const char * msg = ""; + const char * const name = OutCopFILE(PL_curcop); + + if (PL_in_eval) { + SV * errsv = ERRSV; + if (SvCUR(errsv)) { + msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); + } + } + + if (s == NULL) { + abort_execution(msg, name); + } + else { + Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name); + } } PL_in_my = 0; PL_in_my_stash = NULL; diff --git a/utf8.c b/utf8.c index 77e16f3..89c8413 100644 --- a/utf8.c +++ b/utf8.c @@ -754,11 +754,15 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags) return UTF8SKIP(s); } -STATIC char * -S__byte_dump_string(pTHX_ const U8 * s, const STRLEN len) +char * +Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format) { /* Returns a mortalized C string that is a displayable copy of the 'len' - * bytes starting at 's', each in a \xXY format. */ + * bytes starting at 's'. 'format' gives how to display each byte. + * Currently, there are only two formats, so it is currently a bool: + * 0 \xab + * 1 ab (that is a space between two hex digit bytes) + */ const STRLEN output_len = 4 * len + 1; /* 4 bytes per each input, plus a trailing NUL */ @@ -776,8 +780,13 @@ S__byte_dump_string(pTHX_ const U8 * s, const STRLEN len) const unsigned high_nibble = (*s & 0xF0) >> 4; const unsigned low_nibble = (*s & 0x0F); - *d++ = '\\'; - *d++ = 'x'; + if (format) { + *d++ = ' '; + } + else { + *d++ = '\\'; + *d++ = 'x'; + } if (high_nibble < 10) { *d++ = high_nibble + '0'; @@ -827,7 +836,7 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s, return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x," " %s after start byte 0x%02x; need %d bytes, got %d)", malformed_text, - _byte_dump_string(s, print_len), + _byte_dump_string(s, print_len, 0), *(s + non_cont_byte_pos), where, *s, @@ -1179,14 +1188,6 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, /* Save how many bytes were actually in the character */ curlen = s - s0; - /* A convenience macro that matches either of the too-short conditions. */ -# define UTF8_GOT_TOO_SHORT (UTF8_GOT_SHORT|UTF8_GOT_NON_CONTINUATION) - - if (UNLIKELY(possible_problems & UTF8_GOT_TOO_SHORT)) { - uv_so_far = uv; - uv = UNICODE_REPLACEMENT; - } - /* Note that there are two types of too-short malformation. One is when * there is actual wrong data before the normal termination of the * sequence. The other is that the sequence wasn't complete before the end @@ -1194,7 +1195,15 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, * This means that we were passed data for a partial character, but it is * valid as far as we saw. The other is definitely invalid. This * distinction could be important to a caller, so the two types are kept - * separate. */ + * separate. + * + * A convenience macro that matches either of the too-short conditions. */ +# define UTF8_GOT_TOO_SHORT (UTF8_GOT_SHORT|UTF8_GOT_NON_CONTINUATION) + + if (UNLIKELY(possible_problems & UTF8_GOT_TOO_SHORT)) { + uv_so_far = uv; + uv = UNICODE_REPLACEMENT; + } /* Check for overflow */ if (UNLIKELY(does_utf8_overflow(s0, send))) { @@ -1401,7 +1410,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, if (pack_warn) { message = Perl_form(aTHX_ "%s: %s (overflows)", malformed_text, - _byte_dump_string(s0, send - s0)); + _byte_dump_string(s0, send - s0, 0)); } } } @@ -1437,7 +1446,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, "%s: %s (unexpected continuation byte 0x%02x," " with no preceding start byte)", malformed_text, - _byte_dump_string(s0, 1), *s0); + _byte_dump_string(s0, 1, 0), *s0); } } } @@ -1452,7 +1461,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, message = Perl_form(aTHX_ "%s: %s (too short; %d byte%s available, need %d)", malformed_text, - _byte_dump_string(s0, send - s0), + _byte_dump_string(s0, send - s0, 0), (int)avail_len, avail_len == 1 ? "" : "s", (int)expectlen); @@ -1516,8 +1525,8 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, " should be represented with a" " different, shorter sequence)", malformed_text, - _byte_dump_string(s0, send - s0), - _byte_dump_string(s0, curlen)); + _byte_dump_string(s0, send - s0, 0), + _byte_dump_string(s0, curlen, 0)); } else { U8 tmpbuf[UTF8_MAXBYTES+1]; @@ -1527,8 +1536,8 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, "%s: %s (overlong; instead use %s to represent" " U+%0*" UVXf ")", malformed_text, - _byte_dump_string(s0, send - s0), - _byte_dump_string(tmpbuf, e - tmpbuf), + _byte_dump_string(s0, send - s0, 0), + _byte_dump_string(tmpbuf, e - tmpbuf, 0), ((uv < 256) ? 2 : 4), /* Field width of 2 for small code points */ uv); @@ -1553,7 +1562,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, message = Perl_form(aTHX_ "UTF-16 surrogate (any UTF-8 sequence that" " starts with \"%s\" is for a surrogate)", - _byte_dump_string(s0, curlen)); + _byte_dump_string(s0, curlen, 0)); } else { message = Perl_form(aTHX_ @@ -1583,7 +1592,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, "Any UTF-8 sequence that starts with" " \"%s\" is for a non-Unicode code point," " may not be portable", - _byte_dump_string(s0, curlen)); + _byte_dump_string(s0, curlen, 0)); } else { message = Perl_form(aTHX_ @@ -1622,7 +1631,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, "Any UTF-8 sequence that starts with" " \"%s\" is for a non-Unicode code" " point, and is not portable", - _byte_dump_string(s0, curlen)); + _byte_dump_string(s0, curlen, 0)); } else { message = Perl_form(aTHX_ @@ -1748,6 +1757,8 @@ Also implemented as a macro in utf8.h UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) { + PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF; + assert(s < send); return utf8n_to_uvchr(s, send - s, retlen, @@ -4541,12 +4552,12 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash) while ((from_list = (AV *) hv_iternextsv(specials_inverse, &char_to, &to_len))) { - if (av_tindex_nomg(from_list) > 0) { + if (av_tindex_skip_len_mg(from_list) > 0) { SSize_t i; /* We iterate over all combinations of i,j to place each code * point on each list */ - for (i = 0; i <= av_tindex_nomg(from_list); i++) { + for (i = 0; i <= av_tindex_skip_len_mg(from_list); i++) { SSize_t j; AV* i_list = newAV(); SV** entryp = av_fetch(from_list, i, FALSE); @@ -4563,7 +4574,7 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash) } /* For DEBUG_U: UV u = valid_utf8_to_uvchr((U8*) SvPVX(*entryp), 0);*/ - for (j = 0; j <= av_tindex_nomg(from_list); j++) { + for (j = 0; j <= av_tindex_skip_len_mg(from_list); j++) { entryp = av_fetch(from_list, j, FALSE); if (entryp == NULL) { Perl_croak(aTHX_ "panic: av_fetch() unexpectedly failed"); @@ -4639,7 +4650,7 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash) /* Look through list to see if this inverse mapping already is * listed, or if there is a mapping to itself already */ - for (i = 0; i <= av_tindex_nomg(list); i++) { + for (i = 0; i <= av_tindex_skip_len_mg(list); i++) { SV** entryp = av_fetch(list, i, FALSE); SV* entry; UV uv; diff --git a/utf8.h b/utf8.h index 0fbe4b7..affa2d6 100644 --- a/utf8.h +++ b/utf8.h @@ -707,7 +707,7 @@ with a ptr argument. /* A Unicode character can fold to up to 3 characters */ #define UTF8_MAX_FOLD_CHAR_EXPAND 3 -#define IN_BYTES (CopHINTS_get(PL_curcop) & HINT_BYTES) +#define IN_BYTES UNLIKELY(CopHINTS_get(PL_curcop) & HINT_BYTES) /* @@ -726,12 +726,12 @@ case any call to string overloading updates the internal UTF-8 encoding flag. * Is so within 'feature unicode_strings' or 'locale :not_characters', and not * within 'use bytes'. UTF-8 locales are not tested for here, but perhaps * could be */ -#define IN_UNI_8_BIT \ - (((CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT)) \ - || (CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL \ - /* -1 below is for :not_characters */ \ - && _is_in_locale_category(FALSE, -1))) \ - && ! IN_BYTES) +#define IN_UNI_8_BIT \ + (( ( (CopHINTS_get(PL_curcop) & HINT_UNI_8_BIT)) \ + || ( CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL \ + /* -1 below is for :not_characters */ \ + && _is_in_locale_category(FALSE, -1))) \ + && (! IN_BYTES)) #define UTF8_ALLOW_EMPTY 0x0001 /* Allow a zero length string */ @@ -802,10 +802,10 @@ case any call to string overloading updates the internal UTF-8 encoding flag. #define UTF8_WARN_ILLEGAL_INTERCHANGE \ (UTF8_WARN_ILLEGAL_C9_INTERCHANGE|UTF8_WARN_NONCHAR) -/* This is used typically for code that is willing to accept inputs of - * illformed UTF-8 sequences, for whatever reason. However, all such sequences - * evaluate to the REPLACEMENT CHARACTER unless other flags overriding this are - * also present. */ +/* This is typically used for code that processes UTF-8 input and doesn't want + * to have to deal with any malformations that might be present. All such will + * be safely replaced by the REPLACEMENT CHARACTER, unless other flags + * overriding this are also present. */ #define UTF8_ALLOW_ANY ( UTF8_ALLOW_CONTINUATION \ |UTF8_ALLOW_NON_CONTINUATION \ |UTF8_ALLOW_SHORT \ diff --git a/util.c b/util.c index a542f5e..bd568bc 100644 --- a/util.c +++ b/util.c @@ -619,11 +619,11 @@ Perl_ninstr(const char *big, const char *bigend, const char *little, const char return (char*)big; { const char first = *little; - const char *s, *x; bigend -= lend - little++; OUTER: while (big <= bigend) { if (*big++ == first) { + const char *s, *x; for (x=big,s=little; s < lend; x++,s++) { if (*s != *x) goto OUTER; @@ -951,16 +951,7 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U char * const b = ninstr((char*)big,(char*)bigend, (char*)little, (char*)little + littlelen); - if (!b && tail) { /* Automatically multiline! */ - /* Chop \n from littlestr: */ - s = bigend - littlelen + 1; - if (*s == *little - && memEQ((char*)s + 1, (char*)little + 1, littlelen - 2)) - { - return (char*)s; - } - return NULL; - } + assert(!tail); /* valid => FBM; tail only set on SvVALID SVs */ return b; } @@ -1031,89 +1022,6 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U } } - -/* -=for apidoc foldEQ - -Returns true if the leading C bytes of the strings C and C are the -same -case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes -match themselves and their opposite case counterparts. Non-cased and non-ASCII -range bytes match only themselves. - -=cut -*/ - - -I32 -Perl_foldEQ(const char *s1, const char *s2, I32 len) -{ - const U8 *a = (const U8 *)s1; - const U8 *b = (const U8 *)s2; - - PERL_ARGS_ASSERT_FOLDEQ; - - assert(len >= 0); - - while (len--) { - if (*a != *b && *a != PL_fold[*b]) - return 0; - a++,b++; - } - return 1; -} -I32 -Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len) -{ - /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on - * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor - * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor - * does it check that the strings each have at least 'len' characters */ - - const U8 *a = (const U8 *)s1; - const U8 *b = (const U8 *)s2; - - PERL_ARGS_ASSERT_FOLDEQ_LATIN1; - - assert(len >= 0); - - while (len--) { - if (*a != *b && *a != PL_fold_latin1[*b]) { - return 0; - } - a++, b++; - } - return 1; -} - -/* -=for apidoc foldEQ_locale - -Returns true if the leading C bytes of the strings C and C are the -same case-insensitively in the current locale; false otherwise. - -=cut -*/ - -I32 -Perl_foldEQ_locale(const char *s1, const char *s2, I32 len) -{ - dVAR; - const U8 *a = (const U8 *)s1; - const U8 *b = (const U8 *)s2; - - PERL_ARGS_ASSERT_FOLDEQ_LOCALE; - - assert(len >= 0); - - while (len--) { - if (*a != *b && *a != PL_fold_locale[*b]) - return 0; - a++,b++; - } - return 1; -} - /* copy a string to a safe spot */ /* @@ -1518,14 +1426,17 @@ Perl_mess_sv(pTHX_ SV *basemsg, bool consume) * from the sibling of PL_curcop. */ - const COP *cop = - closest_cop(PL_curcop, OpSIBLING(PL_curcop), PL_op, FALSE); - if (!cop) - cop = PL_curcop; + if (PL_curcop) { + const COP *cop = + closest_cop(PL_curcop, OpSIBLING(PL_curcop), PL_op, FALSE); + if (!cop) + cop = PL_curcop; + + if (CopLINE(cop)) + Perl_sv_catpvf(aTHX_ sv, " at %s line %" IVdf, + OutCopFILE(cop), (IV)CopLINE(cop)); + } - if (CopLINE(cop)) - Perl_sv_catpvf(aTHX_ sv, " at %s line %" IVdf, - OutCopFILE(cop), (IV)CopLINE(cop)); /* Seems that GvIO() can be untrustworthy during global destruction. */ if (GvIO(PL_last_in_gv) && (SvTYPE(GvIOp(PL_last_in_gv)) == SVt_PVIO) && IoLINES(GvIOp(PL_last_in_gv))) @@ -2546,10 +2457,9 @@ Perl_my_popen_list(pTHX_ const char *mode, int n, SV **args) if (did_pipes && pid > 0) { int errkid; unsigned n = 0; - SSize_t n1; while (n < sizeof(int)) { - n1 = PerlLIO_read(pp[0], + const SSize_t n1 = PerlLIO_read(pp[0], (void*)(((char*)&errkid)+n), (sizeof(int)) - n); if (n1 <= 0) @@ -2704,10 +2614,9 @@ Perl_my_popen(pTHX_ const char *cmd, const char *mode) if (did_pipes && pid > 0) { int errkid; unsigned n = 0; - SSize_t n1; while (n < sizeof(int)) { - n1 = PerlLIO_read(pp[0], + const SSize_t n1 = PerlLIO_read(pp[0], (void*)(((char*)&errkid)+n), (sizeof(int)) - n); if (n1 <= 0) @@ -5244,8 +5153,13 @@ Perl_my_snprintf(char *buffer, const Size_t len, const char *format, ...) if (qfmt) { /* If the format looked promising, use it as quadmath. */ retval = quadmath_snprintf(buffer, len, qfmt, va_arg(ap, NV)); - if (retval == -1) + if (retval == -1) { + if (qfmt != format) { + dTHX; + SAVEFREEPV(qfmt); + } Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", qfmt); + } quadmath_valid = TRUE; if (qfmt != format) Safefree(qfmt); diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template index bc1ebec..935b931 100644 --- a/vms/descrip_mms.template +++ b/vms/descrip_mms.template @@ -307,7 +307,7 @@ utils : $(utils1) $(utils2) $(utils3) $(utils4) $(utils5) extra.pods : miniperl @ @extra_pods.com -PERLDELTA_CURRENT = [.pod]perl5259delta.pod +PERLDELTA_CURRENT = [.pod]perl52510delta.pod $(PERLDELTA_CURRENT) : [.pod]perldelta.pod Copy/NoConfirm/Log $(MMS$SOURCE) $(PERLDELTA_CURRENT) diff --git a/warnings.h b/warnings.h index 6d67520..0166837 100644 --- a/warnings.h +++ b/warnings.h @@ -221,7 +221,8 @@ is by default enabled even if not within the scope of S>. #define unpackWARN4(x) (((x) >>24) & 0xFF) #define ckDEAD(x) \ - ( ! specialWARN(PL_curcop->cop_warnings) && \ + (PL_curcop && \ + !specialWARN(PL_curcop->cop_warnings) && \ ( isWARNf_on(PL_curcop->cop_warnings, WARN_ALL) || \ isWARNf_on(PL_curcop->cop_warnings, unpackWARN1(x)) || \ isWARNf_on(PL_curcop->cop_warnings, unpackWARN2(x)) || \ diff --git a/win32/GNUmakefile b/win32/GNUmakefile index f188940..7a2e761 100644 --- a/win32/GNUmakefile +++ b/win32/GNUmakefile @@ -63,7 +63,7 @@ INST_TOP := $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER := \5.25.9 +#INST_VER := \5.25.10 # # Comment this out if you DON'T want your perl installation to have @@ -173,6 +173,10 @@ USE_LARGE_FILES := define #CCTYPE := MSVC120 # Visual C++ 2013 Express Edition (aka Visual C++ 12.0) (free version) #CCTYPE := MSVC120FREE +# Visual C++ 2015 (aka Visual C++ 14.0) (full version) +#CCTYPE := MSVC140 +# Visual C++ 2015 Express Edition (aka Visual C++ 14.0) (free version) +#CCTYPE := MSVC140FREE # MinGW or mingw-w64 with gcc-3.4.5 or later #CCTYPE := GCC @@ -619,7 +623,13 @@ DEFINES = -DWIN32 -D_CONSOLE -DNO_STRICT LOCDEFS = -DPERLDLL -DPERL_CORE CXX_FLAG = -TP -EHsc +ifeq ($(CCTYPE),MSVC140) +LIBC = ucrt.lib +else ifeq ($(CCTYPE),MSVC140FREE) +LIBC = ucrt.lib +else LIBC = msvcrt.lib +endif ifeq ($(CFG),Debug) OPTIMIZE = -Od -MD -Zi -DDEBUGGING @@ -628,7 +638,13 @@ else ifeq ($(CFG),DebugSymbols) OPTIMIZE = -Od -MD -Zi LINK_DBG = -debug else ifeq ($(CFG),DebugFull) +ifeq ($(CCTYPE),MSVC140) +LIBC = ucrtd.lib +else ifeq ($(CCTYPE),MSVC140FREE) +LIBC = ucrtd.lib +else LIBC = msvcrtd.lib +endif OPTIMIZE = -Od -MDd -Zi -D_DEBUG -DDEBUGGING LINK_DBG = -debug else @@ -661,6 +677,13 @@ ifeq ($(PREMSVC80),undef) DEFINES += -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE endif +# Likewise for deprecated Winsock APIs in VC++ 14.0 for now. +ifeq ($(CCTYPE),MSVC140) +DEFINES = $(DEFINES) -D_WINSOCK_DEPRECATED_NO_WARNINGS +else ifeq ($(CCTYPE),MSVC140FREE) +DEFINES = $(DEFINES) -D_WINSOCK_DEPRECATED_NO_WARNINGS +endif + # In VS 2005 (VC++ 8.0) Microsoft changes time_t from 32-bit to # 64-bit, even in 32-bit mode. It also provides the _USE_32BIT_TIME_T # preprocessor option to revert back to the old functionality for @@ -680,6 +703,20 @@ LIBBASEFILES = oldnames.lib kernel32.lib user32.lib gdi32.lib winspool.lib \ netapi32.lib uuid.lib ws2_32.lib mpr.lib winmm.lib version.lib \ odbc32.lib odbccp32.lib comctl32.lib +ifeq ($(CCTYPE),MSVC140) +ifeq ($(CFG),DebugFull) +LIBBASEFILES += msvcrtd.lib vcruntimed.lib +else +LIBBASEFILES += msvcrt.lib vcruntime.lib +endif +else ifeq ($(CCTYPE),MSVC140FREE) +ifeq ($(CFG),DebugFull) +LIBBASEFILES += msvcrtd.lib vcruntimed.lib +else +LIBBASEFILES += msvcrt.lib vcruntime.lib +endif +endif + # Avoid __intel_new_proc_init link error for libircmt. # libmmd is /MD equivelent, other variants exist. # libmmd is Intel C's math addon funcs to MS CRT, contains long doubles, C99, @@ -1240,6 +1277,27 @@ $(MINIDIR)\.exists : $(CFGH_TMPL) echo #undef NVff&& \ echo #undef NVgf&& \ echo #undef USE_LONG_DOUBLE)>> config.h +ifeq ($(CCTYPE),MSVC140) + @(echo #undef FILE_ptr&& \ + echo #undef FILE_cnt&& \ + echo #undef FILE_base&& \ + echo #undef FILE_bufsiz&& \ + echo #define FILE_ptr(fp) PERLIO_FILE_ptr(fp)&& \ + echo #define FILE_cnt(fp) PERLIO_FILE_cnt(fp)&& \ + echo #define FILE_base(fp) PERLIO_FILE_base(fp)&& \ + echo #define FILE_bufsiz(fp) (PERLIO_FILE_cnt(fp) + PERLIO_FILE_ptr(fp) - PERLIO_FILE_base(fp))&& \ + echo #define I_STDBOOL)>> config.h +else ifeq ($(CCTYPE),MSVC140FREE) + @(echo #undef FILE_ptr&& \ + echo #undef FILE_cnt&& \ + echo #undef FILE_base&& \ + echo #undef FILE_bufsiz&& \ + echo #define FILE_ptr(fp) PERLIO_FILE_ptr(fp)&& \ + echo #define FILE_cnt(fp) PERLIO_FILE_cnt(fp)&& \ + echo #define FILE_base(fp) PERLIO_FILE_base(fp)&& \ + echo #define FILE_bufsiz(fp) (PERLIO_FILE_cnt(fp) + PERLIO_FILE_ptr(fp) - PERLIO_FILE_base(fp))&& \ + echo #define I_STDBOOL)>> config.h +endif ifeq ($(USE_LARGE_FILES),define) @(echo #define Off_t $(INT64)&& \ echo #define LSEEKSIZE ^8&& \ @@ -1571,7 +1629,7 @@ utils: $(HAVEMINIPERL) ..\utils\Makefile copy ..\README.tw ..\pod\perltw.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5259delta.pod + copy ..\pod\perldelta.pod ..\pod\perl52510delta.pod $(MINIPERL) -I..\lib $(PL2BAT) $(UTILS) $(MINIPERL) -I..\lib ..\autodoc.pl .. $(MINIPERL) -I..\lib ..\pod\perlmodlib.PL -q .. @@ -1668,7 +1726,7 @@ distclean: realclean -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -cd $(PODDIR) && del /f *.html *.bat roffitall \ - perl5259delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ + perl52510delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ perlapi.pod perlbs2000.pod perlce.pod perlcn.pod perlcygwin.pod \ perldos.pod perlfreebsd.pod perlhaiku.pod perlhpux.pod \ perlhurd.pod perlintern.pod perlirix.pod perljp.pod perlko.pod \ diff --git a/win32/Makefile b/win32/Makefile index 1aa0e45..6c8f7f6 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -38,7 +38,7 @@ INST_TOP = $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER = \5.25.9 +#INST_VER = \5.25.10 # # Comment this out if you DON'T want your perl installation to have @@ -133,6 +133,10 @@ CCTYPE = MSVC60 #CCTYPE = MSVC120 # Visual C++ 2013 Express Edition (aka Visual C++ 12.0) (free version) #CCTYPE = MSVC120FREE +# Visual C++ 2015 (aka Visual C++ 14.0) (full version) +#CCTYPE = MSVC140 +# Visual C++ 2015 Express Edition (aka Visual C++ 14.0) (free version) +#CCTYPE = MSVC140FREE # # If you are using Intel C++ Compiler uncomment this @@ -467,7 +471,11 @@ DEFINES = -DWIN32 -D_CONSOLE -DNO_STRICT LOCDEFS = -DPERLDLL -DPERL_CORE CXX_FLAG = -TP -EHsc +!IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +LIBC = ucrt.lib +!ELSE LIBC = msvcrt.lib +!ENDIF !IF "$(CFG)" == "Debug" OPTIMIZE = -Od -MD -Zi -DDEBUGGING @@ -478,7 +486,11 @@ OPTIMIZE = -Od -MD -Zi LINK_DBG = -debug !ELSE !IF "$(CFG)" == "DebugFull" +!IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +LIBC = ucrtd.lib +!ELSE LIBC = msvcrtd.lib +!ENDIF OPTIMIZE = -Od -MDd -Zi -D_DEBUG -DDEBUGGING LINK_DBG = -debug !ELSE @@ -513,6 +525,11 @@ OPTIMIZE = $(OPTIMIZE) -fp:precise DEFINES = $(DEFINES) -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE !ENDIF +# Likewise for deprecated Winsock APIs in VC++ 14.0 for now. +!IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +DEFINES = $(DEFINES) -D_WINSOCK_DEPRECATED_NO_WARNINGS +!ENDIF + # In VS 2005 (VC++ 8.0) Microsoft changes time_t from 32-bit to # 64-bit, even in 32-bit mode. It also provides the _USE_32BIT_TIME_T # preprocessor option to revert back to the old functionality for @@ -533,6 +550,14 @@ LIBBASEFILES = \ netapi32.lib uuid.lib ws2_32.lib mpr.lib winmm.lib \ version.lib odbc32.lib odbccp32.lib comctl32.lib +!IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +! IF "$(CFG)" == "DebugFull" +LIBBASEFILES = $(LIBBASEFILES) msvcrtd.lib vcruntimed.lib +! ELSE +LIBBASEFILES = $(LIBBASEFILES) msvcrt.lib vcruntime.lib +! ENDIF +!ENDIF + # Avoid __intel_new_proc_init link error for libircmt. # libmmd is /MD equivelent, other variants exist. # libmmd is Intel C's math addon funcs to MS CRT, contains long doubles, C99, @@ -928,6 +953,17 @@ perlglob$(o) : perlglob.c @echo.>>$@ @echo #ifndef _config_h_footer_>>$@ @echo #define _config_h_footer_>>$@ +!IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" + @echo #undef FILE_ptr>>$@ + @echo #define FILE_ptr(fp) PERLIO_FILE_ptr(fp)>>$@ + @echo #undef FILE_cnt>>$@ + @echo #define FILE_cnt(fp) PERLIO_FILE_cnt(fp)>>$@ + @echo #undef FILE_base>>$@ + @echo #define FILE_base(fp) PERLIO_FILE_base(fp)>>$@ + @echo #undef FILE_bufsiz>>$@ + @echo #define FILE_bufsiz(fp) (PERLIO_FILE_cnt(fp) + PERLIO_FILE_ptr(fp) - PERLIO_FILE_base(fp))>>$@ + @echo #define I_STDBOOL>>$@ +!ENDIF @echo #undef Off_t>>$@ @echo #undef LSEEKSIZE>>$@ @echo #undef Off_t_size>>$@ @@ -1213,7 +1249,7 @@ utils: $(PERLEXE) ..\utils\Makefile copy ..\README.tw ..\pod\perltw.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5259delta.pod + copy ..\pod\perldelta.pod ..\pod\perl52510delta.pod cd ..\win32 $(PERLEXE) $(PL2BAT) $(UTILS) $(MINIPERL) -I..\lib ..\autodoc.pl .. @@ -1312,7 +1348,7 @@ distclean: realclean -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -cd $(PODDIR) && del /f *.html *.bat roffitall \ - perl5259delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ + perl52510delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ perlapi.pod perlbs2000.pod perlce.pod perlcn.pod perlcygwin.pod \ perldos.pod perlfreebsd.pod perlhaiku.pod perlhpux.pod \ perlhurd.pod perlintern.pod perlirix.pod perljp.pod perlko.pod \ diff --git a/win32/config_sh.PL b/win32/config_sh.PL index c4a3112..8d6f738 100644 --- a/win32/config_sh.PL +++ b/win32/config_sh.PL @@ -277,6 +277,13 @@ if ($opt{cc} =~ /\bcl/ and $opt{ccversion} =~ /^(\d+)/) { if($ccversion < 13) { #VC6 $opt{ar} ='lib'; } + if ($ccversion >= 19) { # VC14 + $opt{stdio_base} = 'PERLIO_FILE_base(fp)'; + $opt{stdio_bufsiz} = '(PERLIO_FILE_cnt(fp) + PERLIO_FILE_ptr(fp) - PERLIO_FILE_base(fp))'; + $opt{stdio_cnt} = 'PERLIO_FILE_cnt(fp)'; + $opt{stdio_ptr} = 'PERLIO_FILE_ptr(fp)'; + $opt{i_stdbool} = 'define'; + } } #find out which MSVC this ICC is using elsif ($opt{cc} =~ /\bicl/) { @@ -286,6 +293,13 @@ elsif ($opt{cc} =~ /\bicl/) { $opt{sGMTIME_max} = 32535291599; $opt{sLOCALTIME_max} = 32535244799; } + if ($num_ver =~ /^(\d+)/ && $1 >= 19) { # VC14 + $opt{stdio_base} = 'PERLIO_FILE_base(fp)'; + $opt{stdio_bufsiz} = '(PERLIO_FILE_cnt(fp) + PERLIO_FILE_ptr(fp) - PERLIO_FILE_base(fp))'; + $opt{stdio_cnt} = 'PERLIO_FILE_cnt(fp)'; + $opt{stdio_ptr} = 'PERLIO_FILE_ptr(fp)'; + $opt{i_stdbool} = 'define'; + } $opt{ar} ='xilib'; } diff --git a/win32/makefile.mk b/win32/makefile.mk index 931ee5e..318b13f 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -44,7 +44,7 @@ INST_TOP *= $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER *= \5.25.9 +#INST_VER *= \5.25.10 # # Comment this out if you DON'T want your perl installation to have @@ -145,6 +145,10 @@ USE_LARGE_FILES *= define #CCTYPE = MSVC120 # Visual C++ 2013 Express Edition (aka Visual C++ 12.0) (free version) #CCTYPE = MSVC120FREE +# Visual C++ 2015 (aka Visual C++ 14.0) (full version) +#CCTYPE = MSVC140 +# Visual C++ 2015 Express Edition (aka Visual C++ 14.0) (free version) +#CCTYPE = MSVC140FREE # MinGW or mingw-w64 with gcc-3.4.5 or later #CCTYPE = GCC @@ -606,7 +610,11 @@ DEFINES = -DWIN32 -D_CONSOLE -DNO_STRICT LOCDEFS = -DPERLDLL -DPERL_CORE CXX_FLAG = -TP -EHsc +.IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +LIBC = ucrt.lib +.ELSE LIBC = msvcrt.lib +.ENDIF .IF "$(CFG)" == "Debug" OPTIMIZE = -Od -MD -Zi -DDEBUGGING @@ -615,7 +623,11 @@ LINK_DBG = -debug OPTIMIZE = -Od -MD -Zi LINK_DBG = -debug .ELIF "$(CFG)" == "DebugFull" +.IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +LIBC = ucrtd.lib +.ELSE LIBC = msvcrtd.lib +.ENDIF OPTIMIZE = -Od -MDd -Zi -D_DEBUG -DDEBUGGING LINK_DBG = -debug .ELSE @@ -648,6 +660,11 @@ OPTIMIZE += -fp:precise DEFINES += -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE .ENDIF +# Likewise for deprecated Winsock APIs in VC++ 14.0 for now. +.IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +DEFINES = $(DEFINES) -D_WINSOCK_DEPRECATED_NO_WARNINGS +.ENDIF + # In VS 2005 (VC++ 8.0) Microsoft changes time_t from 32-bit to # 64-bit, even in 32-bit mode. It also provides the _USE_32BIT_TIME_T # preprocessor option to revert back to the old functionality for @@ -667,6 +684,14 @@ LIBBASEFILES = oldnames.lib kernel32.lib user32.lib gdi32.lib winspool.lib \ netapi32.lib uuid.lib ws2_32.lib mpr.lib winmm.lib version.lib \ odbc32.lib odbccp32.lib comctl32.lib +.IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" +.IF "$(CFG)" == "DebugFull" +LIBBASEFILES += msvcrtd.lib vcruntimed.lib +.ELSE +LIBBASEFILES += msvcrt.lib vcruntime.lib +.ENDIF +.ENDIF + # Avoid __intel_new_proc_init link error for libircmt. # libmmd is /MD equivelent, other variants exist. # libmmd is Intel C's math addon funcs to MS CRT, contains long doubles, C99, @@ -1208,6 +1233,17 @@ $(MINIDIR)\.exists : $(CFGH_TMPL) echo #undef NVgf&& \ echo #undef USE_LONG_DOUBLE&& \ echo #undef USE_CPLUSPLUS)>> config.h +.IF "$(CCTYPE)" == "MSVC140" || "$(CCTYPE)" == "MSVC140FREE" + @(echo #undef FILE_ptr&& \ + echo #undef FILE_cnt&& \ + echo #undef FILE_base&& \ + echo #undef FILE_bufsiz&& \ + echo #define FILE_ptr(fp) PERLIO_FILE_ptr(fp)&& \ + echo #define FILE_cnt(fp) PERLIO_FILE_cnt(fp)&& \ + echo #define FILE_base(fp) PERLIO_FILE_base(fp)&& \ + echo #define FILE_bufsiz(fp) (PERLIO_FILE_cnt(fp) + PERLIO_FILE_ptr(fp) - PERLIO_FILE_base(fp))&& \ + echo #define I_STDBOOL)>> config.h +.ENDIF .IF "$(USE_LARGE_FILES)"=="define" @(echo #define Off_t $(INT64)&& \ echo #define LSEEKSIZE ^8&& \ @@ -1530,7 +1566,7 @@ utils: $(HAVEMINIPERL) ..\utils\Makefile copy ..\README.tw ..\pod\perltw.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5259delta.pod + copy ..\pod\perldelta.pod ..\pod\perl52510delta.pod $(MINIPERL) -I..\lib $(PL2BAT) $(UTILS) $(MINIPERL) -I..\lib ..\autodoc.pl .. $(MINIPERL) -I..\lib ..\pod\perlmodlib.PL -q .. @@ -1628,7 +1664,7 @@ distclean: realclean -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -cd $(PODDIR) && del /f *.html *.bat roffitall \ - perl5259delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ + perl52510delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ perlapi.pod perlbs2000.pod perlce.pod perlcn.pod perlcygwin.pod \ perldos.pod perlfreebsd.pod perlhaiku.pod perlhpux.pod \ perlhurd.pod perlintern.pod perlirix.pod perljp.pod perlko.pod \ diff --git a/win32/perlhost.h b/win32/perlhost.h index 9963319..84b08c9 100644 --- a/win32/perlhost.h +++ b/win32/perlhost.h @@ -836,15 +836,15 @@ PerlStdIOFdupopen(struct IPerlStdIO* piPerl, FILE* pf) int fileno = win32_dup(win32_fileno(pf)); /* open the file in the same mode */ - if((pf)->_flag & _IOREAD) { + if (PERLIO_FILE_flag(pf) & PERLIO_FILE_flag_RD) { mode[0] = 'r'; mode[1] = 0; } - else if((pf)->_flag & _IOWRT) { + else if (PERLIO_FILE_flag(pf) & PERLIO_FILE_flag_WR) { mode[0] = 'a'; mode[1] = 0; } - else if((pf)->_flag & _IORW) { + else if (PERLIO_FILE_flag(pf) & PERLIO_FILE_flag_RW) { mode[0] = 'r'; mode[1] = '+'; mode[2] = 0; diff --git a/win32/pod.mak b/win32/pod.mak index e877895..2917d20 100644 --- a/win32/pod.mak +++ b/win32/pod.mak @@ -47,6 +47,7 @@ POD = perl.pod \ perl5240delta.pod \ perl5241delta.pod \ perl5250delta.pod \ + perl52510delta.pod \ perl5251delta.pod \ perl5252delta.pod \ perl5253delta.pod \ @@ -198,6 +199,7 @@ MAN = perl.man \ perl5240delta.man \ perl5241delta.man \ perl5250delta.man \ + perl52510delta.man \ perl5251delta.man \ perl5252delta.man \ perl5253delta.man \ @@ -349,6 +351,7 @@ HTML = perl.html \ perl5240delta.html \ perl5241delta.html \ perl5250delta.html \ + perl52510delta.html \ perl5251delta.html \ perl5252delta.html \ perl5253delta.html \ @@ -500,6 +503,7 @@ TEX = perl.tex \ perl5240delta.tex \ perl5241delta.tex \ perl5250delta.tex \ + perl52510delta.tex \ perl5251delta.tex \ perl5252delta.tex \ perl5253delta.tex \ diff --git a/win32/win32.c b/win32/win32.c index 6ac73e2..3981921 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -4161,15 +4161,15 @@ win32_fdupopen(FILE *pf) int fileno = win32_dup(win32_fileno(pf)); /* open the file in the same mode */ - if((pf)->_flag & _IOREAD) { + if (PERLIO_FILE_flag(pf) & PERLIO_FILE_flag_RD) { mode[0] = 'r'; mode[1] = 0; } - else if((pf)->_flag & _IOWRT) { + else if (PERLIO_FILE_flag(pf) & PERLIO_FILE_flag_WR) { mode[0] = 'a'; mode[1] = 0; } - else if((pf)->_flag & _IORW) { + else if (PERLIO_FILE_flag(pf) & PERLIO_FILE_flag_RW) { mode[0] = 'r'; mode[1] = '+'; mode[2] = 0; diff --git a/win32/win32.h b/win32/win32.h index 9b79e00..6de9c9b 100644 --- a/win32/win32.h +++ b/win32/win32.h @@ -262,13 +262,13 @@ typedef unsigned short mode_t; #define snprintf _snprintf #define vsnprintf _vsnprintf -/* on VC2003, msvcrt.lib is missing these symbols */ +/* on VS2003, msvcrt.lib is missing these symbols */ #if _MSC_VER >= 1300 && _MSC_VER < 1400 # pragma intrinsic(_rotl64,_rotr64) #endif -# pragma warning(push) -# pragma warning(disable:4756;disable:4056) +#pragma warning(push) +#pragma warning(disable:4756;disable:4056) PERL_STATIC_INLINE double S_Infinity() { /* this is a real C literal which can get further constant folded @@ -277,8 +277,8 @@ double S_Infinity() { folding INF is creating -INF */ return (DBL_MAX+DBL_MAX); } -# pragma warning(pop) -# define NV_INF S_Infinity() +#pragma warning(pop) +#define NV_INF S_Infinity() /* selectany allows duplicate and unused data symbols to be removed by VC linker, if this were static, each translation unit will have its own, @@ -290,10 +290,64 @@ double S_Infinity() { that DLL actually uses __PL_nan_u */ extern const __declspec(selectany) union { unsigned __int64 __q; double __d; } __PL_nan_u = { 0x7FF8000000000000UI64 }; -# define NV_NAN ((NV)__PL_nan_u.__d) +#define NV_NAN ((NV)__PL_nan_u.__d) + +/* The CRT was rewritten in VS2015. */ +#if _MSC_VER >= 1900 + +/* No longer declared in stdio.h */ +char *gets(char* buffer); + +#define tzname _tzname + +/* From corecrt_internal_stdio.h: */ +typedef struct +{ + union + { + FILE _public_file; + char* _ptr; + }; + + char* _base; + int _cnt; + long _flags; + long _file; + int _charbuf; + int _bufsiz; + char* _tmpfname; + CRITICAL_SECTION _lock; +} __crt_stdio_stream_data; + +#define PERLIO_FILE_flag_RD 0x0001 /* _IOREAD */ +#define PERLIO_FILE_flag_WR 0x0002 /* _IOWRITE */ +#define PERLIO_FILE_flag_RW 0x0004 /* _IOUPDATE */ +#define PERLIO_FILE_ptr(f) (((__crt_stdio_stream_data*)(f))->_ptr) +#define PERLIO_FILE_base(f) (((__crt_stdio_stream_data*)(f))->_base) +#define PERLIO_FILE_cnt(f) (((__crt_stdio_stream_data*)(f))->_cnt) +#define PERLIO_FILE_flag(f) ((int)(((__crt_stdio_stream_data*)(f))->_flags)) +#define PERLIO_FILE_file(f) ((int)(((__crt_stdio_stream_data*)(f))->_file)) + +#endif #endif /* _MSC_VER */ +#if (!defined(_MSC_VER)) || (defined(_MSC_VER) && _MSC_VER < 1900) + +/* Note: PERLIO_FILE_ptr/base/cnt are not actually used for GCC or _ptr) +#define PERLIO_FILE_base(f) ((f)->_base) +#define PERLIO_FILE_cnt(f) ((f)->_cnt) +#define PERLIO_FILE_flag(f) ((f)->_flag) +#define PERLIO_FILE_file(f) ((f)->_file) + +#endif + #ifdef __MINGW32__ /* Minimal Gnu-Win32 */ typedef long uid_t; @@ -545,21 +599,31 @@ void win32_wait_for_children(pTHX); # define PERL_WAIT_FOR_CHILDREN win32_wait_for_children(aTHX) #endif +/* The following ioinfo struct manipulations had been removed but were + * reinstated to fix RT#120091/118059. However, they do not work with + * the rewritten CRT in VS2015 so they are removed once again for VS2015 + * onwards, which will therefore suffer from the reintroduction of the + * close socket bug. */ +#if (!defined(_MSC_VER)) || (defined(_MSC_VER) && _MSC_VER < 1900) + #ifdef PERL_CORE + /* C doesn't like repeat struct definitions */ #if defined(__MINGW32__) && (__MINGW32_MAJOR_VERSION>=3) -#undef _CRTIMP +# undef _CRTIMP #endif #ifndef _CRTIMP -#define _CRTIMP __declspec(dllimport) +# define _CRTIMP __declspec(dllimport) #endif -/* VV 2005 has multiple ioinfo struct definitions through VC 2005's release life - * VC 2008-2012 have been stable but do not assume future VCs will have the +/* VS2005 has multiple ioinfo struct definitions through VS2005's release life + * VS2008-2012 have been stable but do not assume future VSs will have the * same ioinfo struct, just because past struct stability. If research is done - * on the CRTs of future VS, the version check can be bumped up so the newer - * VC uses a fixed ioinfo size. + * on the CRTs of future VSs, the version check can be bumped up so the newer + * VS uses a fixed ioinfo size. (Actually, only VS2013 (_MSC_VER 1800) hasn't + * been looked at; after that we cannot use the ioinfo struct anyway (see the + * #if above).) */ #if ! (_MSC_VER < 1400 || (_MSC_VER >= 1500 && _MSC_VER <= 1700) \ || defined(__MINGW32__)) @@ -582,7 +646,7 @@ typedef struct { # if _MSC_VER >= 1400 && _MSC_VER < 1500 # error "This ioinfo struct is incomplete for Visual C 2005" # endif -/* VC 2005 CRT has at least 3 different definitions of this struct based on the +/* VS2005 CRT has at least 3 different definitions of this struct based on the * CRT DLL's build number. */ # if _MSC_VER >= 1500 # ifndef _SAFECRT_IMPL @@ -636,9 +700,12 @@ EXTERN_C _CRTIMP ioinfo* __pioinfo[]; #endif /* since we are not doing a dup2(), this works fine */ -# define _set_osfhnd(fh, osfh) (void)(_osfhnd(fh) = (intptr_t)osfh) +#define _set_osfhnd(fh, osfh) (void)(_osfhnd(fh) = (intptr_t)osfh) + #endif /* PERL_CORE */ +#endif /* !defined(_MSC_VER) || _MSC_VER<1900 */ + /* IO.xs and POSIX.xs define PERLIO_NOT_STDIO to 1 */ #if defined(PERL_EXT_IO) || defined(PERL_EXT_POSIX) #undef PERLIO_NOT_STDIO diff --git a/win32/win32sck.c b/win32/win32sck.c index 8eba4cd..d9d7f36 100644 --- a/win32/win32sck.c +++ b/win32/win32sck.c @@ -694,10 +694,15 @@ int my_close(int fd) int err; err = closesocket(osf); if (err == 0) { +#ifdef _set_osfhnd assert(_osfhnd(fd) == osf); /* catch a bad ioinfo struct def */ /* don't close freed handle */ _set_osfhnd(fd, INVALID_HANDLE_VALUE); return close(fd); +#else + (void)close(fd); /* handle already closed, ignore error */ + return 0; +#endif } else if (err == SOCKET_ERROR) { int wsaerr = WSAGetLastError(); @@ -726,10 +731,15 @@ my_fclose (FILE *pf) win32_fflush(pf); err = closesocket(osf); if (err == 0) { +#ifdef _set_osfhnd assert(_osfhnd(win32_fileno(pf)) == osf); /* catch a bad ioinfo struct def */ /* don't close freed handle */ _set_osfhnd(win32_fileno(pf), INVALID_HANDLE_VALUE); return fclose(pf); +#else + (void)fclose(pf); /* handle already closed, ignore error */ + return 0; +#endif } else if (err == SOCKET_ERROR) { int wsaerr = WSAGetLastError();