From d7d9edaf02b2a322c1147136a01d5cfc96482ab1 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 28 Jun 2017 10:44:00 +0900 Subject: [PATCH] Imported Upstream version 5.23.6 Change-Id: Ie310320f920edd66f731e1c79cfb4ee2ac84cb41 Signed-off-by: DongHun Kwak --- .dir-locals.el | 2 +- AUTHORS | 1 + Configure | 63 +- Cross/config.sh-arm-linux | 40 +- Cross/config.sh-arm-linux-n770 | 40 +- INSTALL | 30 +- MANIFEST | 15 +- META.json | 3 +- META.yml | 5 +- Makefile.SH | 8 +- NetWare/Makefile | 4 +- NetWare/config_H.wc | 10 +- Porting/Glossary | 20 +- Porting/Maintainers.pl | 39 +- Porting/checkAUTHORS.pl | 1 + Porting/config.sh | 42 +- Porting/config_H | 18 +- Porting/deparse-skips.txt | 2 +- Porting/epigraphs.pod | 111 ++ Porting/perldelta_template.pod | 2 +- Porting/release_schedule.pod | 52 +- Porting/todo.pod | 4 +- README.haiku | 4 +- README.macosx | 8 +- README.os2 | 2 +- README.vms | 4 +- amigaos4/amigaio.c | 15 +- amigaos4/config.sh | 1 + autodoc.pl | 4 +- caretx.c | 138 +-- cflags.SH | 23 + charclass_invlists.h | 4 +- config_h.SH | 15 +- .../lib/CPAN/Meta/Requirements.pm | 187 +++- cpan/CPAN-Meta-Requirements/t/accepts.t | 22 +- cpan/CPAN-Meta-Requirements/t/bad_version_hook.t | 13 +- cpan/CPAN-Meta-Requirements/t/basic.t | 34 +- cpan/CPAN-Meta-Requirements/t/finalize.t | 2 + cpan/CPAN-Meta-Requirements/t/from-hash.t | 13 +- cpan/CPAN-Meta-Requirements/t/merge.t | 2 + cpan/CPAN-Meta-Requirements/t/strings.t | 2 + cpan/CPAN-Meta-YAML/lib/CPAN/Meta/YAML.pm | 4 +- cpan/Locale-Codes/lib/Locale/Codes.pm | 26 +- cpan/Locale-Codes/lib/Locale/Codes/Changes.pod | 4 +- cpan/Locale-Codes/lib/Locale/Codes/Constants.pm | 2 +- cpan/Locale-Codes/lib/Locale/Codes/Country.pm | 2 +- .../Locale-Codes/lib/Locale/Codes/Country_Codes.pm | 4 +- .../lib/Locale/Codes/Country_Retired.pm | 4 +- cpan/Locale-Codes/lib/Locale/Codes/Currency.pm | 2 +- .../lib/Locale/Codes/Currency_Codes.pm | 4 +- .../lib/Locale/Codes/Currency_Retired.pm | 4 +- cpan/Locale-Codes/lib/Locale/Codes/LangExt.pm | 2 +- .../Locale-Codes/lib/Locale/Codes/LangExt_Codes.pm | 4 +- .../lib/Locale/Codes/LangExt_Retired.pm | 4 +- cpan/Locale-Codes/lib/Locale/Codes/LangFam.pm | 2 +- .../Locale-Codes/lib/Locale/Codes/LangFam_Codes.pm | 4 +- .../lib/Locale/Codes/LangFam_Retired.pm | 2 +- cpan/Locale-Codes/lib/Locale/Codes/LangVar.pm | 2 +- .../Locale-Codes/lib/Locale/Codes/LangVar_Codes.pm | 268 ++--- .../lib/Locale/Codes/LangVar_Retired.pm | 4 +- cpan/Locale-Codes/lib/Locale/Codes/Language.pm | 2 +- .../lib/Locale/Codes/Language_Codes.pm | 4 +- .../lib/Locale/Codes/Language_Retired.pm | 4 +- cpan/Locale-Codes/lib/Locale/Codes/Script.pm | 2 +- cpan/Locale-Codes/lib/Locale/Codes/Script_Codes.pm | 4 +- .../lib/Locale/Codes/Script_Retired.pm | 4 +- cpan/Locale-Codes/lib/Locale/Country.pm | 2 +- cpan/Locale-Codes/lib/Locale/Currency.pm | 2 +- cpan/Locale-Codes/lib/Locale/Language.pm | 2 +- cpan/Locale-Codes/lib/Locale/Script.pm | 2 +- cpan/Locale-Codes/t/code2country.t | 8 + cpan/Math-BigInt-FastCalc/FastCalc.xs | 4 +- .../lib/Math/BigInt/FastCalc.pm | 2 +- cpan/Module-Metadata/lib/Module/Metadata.pm | 6 +- cpan/Module-Metadata/t/extract-version.t | 6 +- cpan/Pod-Perldoc/lib/Pod/Perldoc.pm | 6 +- cpan/Term-ANSIColor/lib/Term/ANSIColor.pm | 24 +- cpan/Text-ParseWords/t/ParseWords.t | 244 ++--- cpan/Unicode-Normalize/Makefile.PL | 1 + cpan/Unicode-Normalize/Normalize.pm | 4 +- cpan/Unicode-Normalize/Normalize.xs | 4 +- dist/Data-Dumper/Dumper.pm | 26 +- dist/Data-Dumper/Dumper.xs | 272 ++--- dist/Data-Dumper/t/trailing_comma.t | 116 ++ dist/ExtUtils-CBuilder/t/00-have-compiler.t | 32 +- dist/ExtUtils-ParseXS/lib/perlxs.pod | 6 +- dist/Module-CoreList/Changes | 6 + dist/Module-CoreList/lib/Module/CoreList.pm | 128 ++- .../lib/Module/CoreList/TieHashDelta.pm | 2 +- dist/Module-CoreList/lib/Module/CoreList/Utils.pm | 16 +- dist/PathTools/Cwd.pm | 2 +- dist/PathTools/lib/File/Spec.pm | 2 +- dist/PathTools/lib/File/Spec/AmigaOS.pm | 2 +- dist/PathTools/lib/File/Spec/Cygwin.pm | 2 +- dist/PathTools/lib/File/Spec/Epoc.pm | 2 +- dist/PathTools/lib/File/Spec/Functions.pm | 2 +- dist/PathTools/lib/File/Spec/Mac.pm | 2 +- dist/PathTools/lib/File/Spec/OS2.pm | 2 +- dist/PathTools/lib/File/Spec/Unix.pm | 2 +- dist/PathTools/lib/File/Spec/VMS.pm | 2 +- dist/PathTools/lib/File/Spec/Win32.pm | 2 +- dist/Test/lib/Test.pm | 13 +- dist/base/Changes | 3 + dist/base/lib/base.pm | 2 +- dist/base/lib/fields.pm | 2 +- .../encoding-warnings/lib/encoding/warnings.pm | 4 +- {cpan => dist}/encoding-warnings/t/1-warning.t | 4 + {cpan => dist}/encoding-warnings/t/2-fatal.t | 4 + {cpan => dist}/encoding-warnings/t/3-normal.t | 0 {cpan => dist}/encoding-warnings/t/4-lexical.t | 4 + dist/threads-shared/lib/threads/shared.pm | 2 +- dist/threads-shared/shared.xs | 3 + dist/threads/lib/threads.pm | 2 +- dist/threads/threads.xs | 37 +- doop.c | 19 +- dquote.c | 4 - dquote_inline.h | 8 +- ebcdic_tables.h | 822 +++++++------- embed.fnc | 21 +- embed.h | 4 + ext/DynaLoader/DynaLoader_pm.PL | 2 +- ext/DynaLoader/dl_dllload.xs | 3 +- ext/Hash-Util-FieldHash/FieldHash.xs | 9 - ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm | 2 +- ext/Opcode/Opcode.pm | 2 +- ext/Opcode/Opcode.xs | 6 +- ext/POSIX/t/is.t | 4 +- ext/POSIX/t/posix.t | 12 +- ext/POSIX/t/time.t | 9 +- ext/POSIX/t/wrappers.t | 2 +- ext/PerlIO-mmap/mmap.pm | 2 +- ext/PerlIO-mmap/mmap.xs | 16 - ext/SDBM_File/SDBM_File.pm | 2 +- ext/SDBM_File/SDBM_File.xs | 2 + ext/XS-APItest/APItest.pm | 2 +- ext/XS-APItest/APItest.xs | 48 + ext/XS-APItest/t/handy.t | 5 +- ext/XS-APItest/t/locale.t | 9 +- ext/XS-APItest/t/utf8.t | 1135 ++++++++++++++++---- ext/re/t/re_funcs_u.t | 9 +- gv.h | 4 + hints/catamount.sh | 4 +- hints/darwin.sh | 57 +- hints/solaris_2.sh | 6 + installhtml | 12 +- intrpvar.h | 4 +- lib/B/Deparse.pm | 28 +- lib/B/Deparse.t | 23 +- lib/B/Op_private.pm | 2 +- lib/Benchmark.pm | 37 +- lib/Benchmark.t | 80 +- lib/locale.pm | 9 +- lib/meta_notation.pm | 11 +- lib/meta_notation.t | 10 +- lib/perl5db.pl | 9 +- lib/perl5db.t | 18 +- lib/strict.pm | 6 +- lib/unicore/mktables | 33 +- lib/utf8.pm | 2 +- lib/utf8_heavy.pl | 4 +- make_ext.pl | 2 +- mg.c | 164 +-- numeric.c | 18 +- op.c | 49 +- op.h | 6 + patchlevel.h | 4 +- perl.h | 185 +++- perlio.c | 51 +- plan9/config.plan9 | 10 +- plan9/config_sh.sample | 38 +- pod/.gitignore | 2 +- pod/perl.pod | 2 + pod/perl5221delta.pod | 338 ++++++ pod/perl5235delta.pod | 420 ++++++++ pod/perldelta.pod | 406 ++++--- pod/perldiag.pod | 85 +- pod/perlebcdic.pod | 27 +- pod/perlfunc.pod | 21 +- pod/perlgit.pod | 271 ++--- pod/perlhist.pod | 4 + pod/perlop.pod | 15 + pod/perlpodspec.pod | 2 +- pod/perlport.pod | 3 - pod/perlre.pod | 40 +- pod/perlref.pod | 4 +- pod/perlreref.pod | 8 +- pod/perlunicode.pod | 18 +- pod/perlvar.pod | 17 +- pp.c | 44 +- pp.h | 41 +- pp_sort.c | 7 + pp_sys.c | 2 - proto.h | 6 + regcharclass.h | 4 +- regcomp.c | 332 ++++-- regcomp.h | 193 +++- regen/charset_translations.pl | 13 +- regen/ebcdic.pl | 26 +- regen/unicode_constants.pl | 16 + regexec.c | 69 +- scope.c | 43 +- sv.c | 41 +- sv.h | 2 +- t/lib/croak/pp_sys | 16 + t/lib/cygwin.t | 20 +- t/lib/warnings/doop | 30 + t/lib/warnings/op | 4 + t/lib/warnings/pp | 8 + t/lib/warnings/utf8 | 174 ++- t/loc_tools.pl | 32 +- t/op/attrs.t | 19 + t/op/bop.t | 8 +- t/op/chop.t | 23 +- t/op/heredoc.t | 11 +- t/op/index.t | 5 +- t/op/int.t | 14 +- t/op/lc.t | 5 - t/op/my.t | 5 + t/op/rand.t | 229 ++-- t/op/sort.t | 13 +- t/op/sprintf2.t | 43 +- t/op/substr.t | 1 + t/op/svleak.t | 26 +- t/op/threads.t | 2 +- t/op/ver.t | 5 +- t/opbasic/qq.t | 1 + t/porting/customized.dat | 4 +- t/porting/known_pod_issues.dat | 2 +- t/re/fold_grind.t | 2 +- t/re/pat_advanced.t | 9 +- t/re/reg_mesg.t | 1 - t/re/speed.t | 6 +- t/re/subst.t | 14 +- t/run/switchC.t | 8 +- t/run/switches.t | 6 +- t/uni/case.pl | 4 +- t/uni/fold.t | 7 +- t/uni/heavy.t | 40 + t/uni/overload.t | 13 +- thread.h | 12 +- toke.c | 31 +- uconfig.h | 17 +- unicode_constants.h | 3 + utf8.c | 661 ++++++++---- utf8.h | 268 +++-- utfebcdic.h | 85 +- util.c | 43 + vms/descrip_mms.template | 2 +- win32/GNUmakefile | 6 +- win32/Makefile | 6 +- win32/config_H.gc | 12 +- win32/config_H.vc | 12 +- win32/makefile.mk | 6 +- win32/perllib.c | 11 - win32/pod.mak | 8 + 255 files changed, 6826 insertions(+), 3130 deletions(-) create mode 100644 dist/Data-Dumper/t/trailing_comma.t rename {cpan => dist}/encoding-warnings/lib/encoding/warnings.pm (98%) rename {cpan => dist}/encoding-warnings/t/1-warning.t (85%) rename {cpan => dist}/encoding-warnings/t/2-fatal.t (85%) rename {cpan => dist}/encoding-warnings/t/3-normal.t (100%) rename {cpan => dist}/encoding-warnings/t/4-lexical.t (88%) create mode 100644 pod/perl5221delta.pod create mode 100644 pod/perl5235delta.pod create mode 100644 t/lib/croak/pp_sys create mode 100644 t/uni/heavy.t diff --git a/.dir-locals.el b/.dir-locals.el index 9e118b6..cf0c842 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -1,5 +1,5 @@ ;; Default settings for all except cpan/ ((nil . ((indent-tabs-mode . nil))) ; all modes - (cperl-mode . ((cperl-indent-level 4))) + (cperl-mode . ((cperl-indent-level . 4))) (c-mode . ((c-indentation-style . bsd) (c-basic-offset . 4)))) diff --git a/AUTHORS b/AUTHORS index d56db73..5391f63 100644 --- a/AUTHORS +++ b/AUTHORS @@ -23,6 +23,7 @@ Aaron B. Dossett Aaron J. Mackey Aaron Priven Aaron Trevena +Achim Gratz Augustina Blair Abe Timmerman Abhijit Menon-Sen diff --git a/Configure b/Configure index 07f0bc1..0e71b4b 100755 --- a/Configure +++ b/Configure @@ -1864,7 +1864,7 @@ esac : run the defines and the undefines, if any, but leave the file out there... touch optdef.sh -grep -q '\\' optdef.sh +grep '\\' optdef.sh >/dev/null 2>&1 if test $? = 0; then echo "Configure does not support \\ in -D arguments" exit 1 @@ -2039,7 +2039,7 @@ true) echo "Fetching answers from $config_sh..." cd .. . $config_sh - . ./optdef.sh + . UU/optdef.sh echo " " . UU/extract rm -rf UU @@ -6956,17 +6956,26 @@ int main() { } #endif /* For alignment 32-bit platforms have the 80 bits in 12 bytes, - * while 64-bits platforms have it in 16 bytes. */ + * while 64-bits platforms have it in 16 bytes. The trailing bytes + * cannot be trusted. */ #if LDBL_MANT_DIG == 64 && (LONGDBLSIZE == 16 || LONGDBLSIZE == 12) - if (b[0] == 0xCD && b[9] == 0xBF && b[10] == 0x00) { + if (b[0] == 0xCD && b[9] == 0xBF) { /* x86 80-bit little-endian, sizeof 12 (ILP32, Solaris x86) * or 16 (LP64, Linux and OS X), 4 or 6 bytes of padding. * Also known as "extended precision". */ printf("3\n"); exit(0); } - if (b[0] == 0xBF && b[9] == 0xCD && b[10] == 0x00) { - /* is there ever big-endian 80-bit, really? */ + if (b[0] == 0xBF && b[9] == 0xCD) { + /* Is there ever big-endian 80-bit, really? + * + * The Motorola 68881 had another "extended precision" format: + * sign:1 exp:15 zero:16 integer:1 mantissa:63 + * for total of 96 bits of bytes. The zero bits were unused. + * See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL" for more details. + * If it ever becomes relevant, this format should be allocated + * a new doublekind code since it's quite different from the Intel x87. + */ printf("4\n"); exit(0); } @@ -6975,17 +6984,35 @@ int main() { /* software "double double", the 106 is 53+53. * but irix thinks it is 107. */ if (b[0] == 0x9A && b[7] == 0x3C && b[8] == 0x9A && b[15] == 0xBF) { - /* double double 128-bit little-endian, + /* double double 128-bit fully little-endian, + * little-endian doubles in little-endian order, * 9a 99 99 99 99 99 59 3c 9a 99 99 99 99 99 b9 bf */ printf("5\n"); exit(0); } if (b[0] == 0xBF && b[7] == 0x9A && b[8] == 0x3C && b[15] == 0x9A) { - /* double double 128-bit big-endian, e.g. PPC/Power and MIPS: + /* double double 128-bit fully big-endian, + * big-endian doubles in big-endian order, + * e.g. PPC/Power and MIPS: * bf b9 99 99 99 99 99 9a 3c 59 99 99 99 99 99 9a */ printf("6\n"); exit(0); } + if (b[0] == 0x9A && b[7] == 0xBF && b[8] == 0x9A && b[15] == 0x3C) { + /* double double 128-bit mixed endian. + * little-endian doubles in big-endian order, + * e.g. ppc64el, + * 9a 99 99 99 99 99 b9 bf 9a 99 99 99 99 99 59 3c */ + printf("7\n"); + exit(0); + } + if (b[0] == 0x3C && b[7] == 0x9A && b[8] == 0xBF && b[15] == 0x9A) { + /* double double 128-bit mixed endian, + * big-endian doubles in little-endian order, + * 3c 59 99 99 99 99 99 9a bf b9 99 99 99 99 99 9a */ + printf("8\n"); + exit(0); + } #endif printf("-1\n"); /* unknown */ exit(0); @@ -7006,8 +7033,10 @@ case "$longdblkind" in 2) echo "You have IEEE 754 128-bit big endian long doubles." >&4 ;; 3) echo "You have x86 80-bit little endian long doubles." >& 4 ;; 4) echo "You have x86 80-bit big endian long doubles." >& 4 ;; -5) echo "You have 128-bit little-endian double-double long doubles." >& 4 ;; -6) echo "You have 128-bit big-endian double-double long doubles." >& 4 ;; +5) echo "You have 128-bit fully little-endian double-double long doubles (64-bit LEs in LE)." >& 4 ;; +6) echo "You have 128-bit fully big-endian double-double long doubles (64-bit BEs in BE)." >& 4 ;; +7) echo "You have 128-bit mixed double-double long doubles (64-bit LEs in BE)." >& 4 ;; +8) echo "You have 128-bit mixed double-double long doubles (64-bit BEs in LE)." >& 4 ;; *) echo "Cannot figure out your long double." >&4 ;; esac $rm_try @@ -20518,14 +20547,22 @@ else ;; esac ;; - 5) # 128-bit LE "double double" + 5) # 128-bit LE-LE "double double" longdblinfbytes='0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f' longdblnanbytes='0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f' ;; - 6) # 128-bit BE "double double" + 6) # 128-bit BE-BE "double double" longdblinfbytes='0x7f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00' longdblnanbytes='0x7f, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00' ;; + 7) # 128-bit LE-BE "double double" + longdblinfbytes='0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00' + longdblnanbytes='0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00' + ;; + 8) # 128-bit BE-LE "double double" + longdblinfbytes='0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00' + longdblnanbytes='0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00' + ;; *) # No idea. longdblinfbytes=$undef longdblnanbytes=$undef @@ -20584,7 +20621,7 @@ $cat >try.c < Perl modules having binary parts (meaning that a C compiler is used) will have to be recompiled to be -used with 5.23.5. If you find you do need to rebuild an extension with -5.23.5, you may safely do so without disturbing the older +used with 5.23.6. If you find you do need to rebuild an extension with +5.23.6, you may safely do so without disturbing the older installations. (See L<"Coexistence with earlier versions of perl 5"> above.) @@ -2569,15 +2569,15 @@ Firstly, the bare minimum to run this script print("$f\n"); } -in Linux with perl-5.23.5 is as follows (under $Config{prefix}): +in Linux with perl-5.23.6 is as follows (under $Config{prefix}): ./bin/perl - ./lib/perl5/5.23.5/strict.pm - ./lib/perl5/5.23.5/warnings.pm - ./lib/perl5/5.23.5/i686-linux/File/Glob.pm - ./lib/perl5/5.23.5/feature.pm - ./lib/perl5/5.23.5/XSLoader.pm - ./lib/perl5/5.23.5/i686-linux/auto/File/Glob/Glob.so + ./lib/perl5/5.23.6/strict.pm + ./lib/perl5/5.23.6/warnings.pm + ./lib/perl5/5.23.6/i686-linux/File/Glob.pm + ./lib/perl5/5.23.6/feature.pm + ./lib/perl5/5.23.6/XSLoader.pm + ./lib/perl5/5.23.6/i686-linux/auto/File/Glob/Glob.so Secondly, for perl-5.10.1, the Debian perl-base package contains 591 files, (of which 510 are for lib/unicore) totaling about 3.5MB in its diff --git a/MANIFEST b/MANIFEST index 5531535..2adf881 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1032,11 +1032,6 @@ cpan/Encode/ucm/viscii.ucm Unicode Character Map cpan/Encode/Unicode/Makefile.PL Encode extension cpan/Encode/Unicode/Unicode.pm Encode extension cpan/Encode/Unicode/Unicode.xs Encode extension -cpan/encoding-warnings/lib/encoding/warnings.pm warn on implicit encoding conversions -cpan/encoding-warnings/t/1-warning.t tests for encoding::warnings -cpan/encoding-warnings/t/2-fatal.t tests for encoding::warnings -cpan/encoding-warnings/t/3-normal.t tests for encoding::warnings -cpan/encoding-warnings/t/4-lexical.t tests for encoding::warnings cpan/experimental/lib/experimental.pm cpan/experimental/t/basic.t cpan/ExtUtils-Constant/lib/ExtUtils/Constant/Base.pm generate XS code to import C header constants @@ -3081,11 +3076,17 @@ dist/Data-Dumper/t/sortkeys.t See if Data::Dumper::Sortkeys works dist/Data-Dumper/t/sparseseen.t See if Data::Dumper::Sparseseen works dist/Data-Dumper/t/terse.t See if Data::Dumper terse option works dist/Data-Dumper/t/toaster.t See if Data::Dumper::Toaster works +dist/Data-Dumper/t/trailing_comma.t See if Data::Dumper::Trailingcomma works dist/Data-Dumper/t/values.t See if Data::Dumper::Values works dist/Devel-SelfStubber/lib/Devel/SelfStubber.pm Generate stubs for SelfLoader.pm dist/Devel-SelfStubber/t/Devel-SelfStubber.t See if Devel::SelfStubber works dist/Dumpvalue/lib/Dumpvalue.pm Screen dump of perl values dist/Dumpvalue/t/Dumpvalue.t See if Dumpvalue works +dist/encoding-warnings/lib/encoding/warnings.pm warn on implicit encoding conversions +dist/encoding-warnings/t/1-warning.t tests for encoding::warnings +dist/encoding-warnings/t/2-fatal.t tests for encoding::warnings +dist/encoding-warnings/t/3-normal.t tests for encoding::warnings +dist/encoding-warnings/t/4-lexical.t tests for encoding::warnings dist/Env/lib/Env.pm Map environment into ordinary variables dist/Env/t/array.t See if Env works for arrays dist/Env/t/env.t See if Env works @@ -4623,11 +4624,13 @@ pod/perl5201delta.pod Perl changes in version 5.20.1 pod/perl5202delta.pod Perl changes in version 5.20.2 pod/perl5203delta.pod Perl changes in version 5.20.3 pod/perl5220delta.pod Perl changes in version 5.22.0 +pod/perl5221delta.pod Perl changes in version 5.22.1 pod/perl5230delta.pod Perl changes in version 5.23.0 pod/perl5231delta.pod Perl changes in version 5.23.1 pod/perl5232delta.pod Perl changes in version 5.23.2 pod/perl5233delta.pod Perl changes in version 5.23.3 pod/perl5234delta.pod Perl changes in version 5.23.4 +pod/perl5235delta.pod Perl changes in version 5.23.5 pod/perl561delta.pod Perl changes in version 5.6.1 pod/perl56delta.pod Perl changes in version 5.6 pod/perl581delta.pod Perl changes in version 5.8.1 @@ -5046,6 +5049,7 @@ t/lib/croak/op Test croak calls from op.c t/lib/croak/pp Test croak calls from pp.c t/lib/croak/pp_ctl Test croak calls from pp_ctl.c t/lib/croak/pp_hot Test croak calls from pp_hot.c +t/lib/croak/pp_sys Test croak calls from pp_sys.c t/lib/croak.t Test calls to Perl_croak() in the C source. t/lib/croak/toke Test croak calls from toke.c t/lib/cygwin.t Builtin cygwin function tests @@ -5548,6 +5552,7 @@ t/uni/fold.t See if Unicode folding works t/uni/goto.t See if Unicode goto &sub works t/uni/greek.t See if Unicode in greek works t/uni/gv.t See if Unicode GVs work. +t/uni/heavy.t See if utf8_heavy.pl uses perl that depends on it t/uni/labels.t See if Unicode labels work t/uni/latin2.t See if Unicode in latin2 works t/uni/lex_utf8.t See if Unicode in lexer works diff --git a/META.json b/META.json index c63b142..74227f0 100644 --- a/META.json +++ b/META.json @@ -23,6 +23,7 @@ "dist/Data-Dumper", "dist/Devel-SelfStubber", "dist/Dumpvalue", + "dist/encoding-warnings", "dist/Env", "dist/Exporter", "dist/ExtUtils-CBuilder", @@ -122,6 +123,6 @@ "url" : "http://perl5.git.perl.org/" } }, - "version" : "5.023005", + "version" : "5.023006", "x_serialization_backend" : "JSON::PP version 2.27300" } diff --git a/META.yml b/META.yml index 5f05dc0..f5791aa 100644 --- a/META.yml +++ b/META.yml @@ -21,6 +21,7 @@ no_index: - dist/Data-Dumper - dist/Devel-SelfStubber - dist/Dumpvalue + - dist/encoding-warnings - dist/Env - dist/Exporter - dist/ExtUtils-CBuilder @@ -109,5 +110,5 @@ resources: homepage: http://www.perl.org/ license: http://dev.perl.org/licenses/ repository: http://perl5.git.perl.org/ -version: '5.023005' -x_serialization_backend: 'CPAN::Meta::YAML version 0.017' +version: '5.023006' +x_serialization_backend: 'CPAN::Meta::YAML version 0.018' diff --git a/Makefile.SH b/Makefile.SH index 3e3892c..caba036 100755 --- a/Makefile.SH +++ b/Makefile.SH @@ -504,7 +504,7 @@ mini_obj = $(minindt_obj) $(MINIDTRACE_O) ndt_obj = $(obj0) $(obj1) $(obj2) $(obj3) $(ARCHOBJS) obj = $(ndt_obj) $(DTRACE_O) -perltoc_pod_prereqs = extra.pods pod/perl5235delta.pod pod/perlapi.pod pod/perlintern.pod pod/perlmodlib.pod pod/perluniprops.pod +perltoc_pod_prereqs = extra.pods pod/perl5236delta.pod pod/perlapi.pod pod/perlintern.pod pod/perlmodlib.pod pod/perluniprops.pod generated_pods = pod/perltoc.pod $(perltoc_pod_prereqs) generated_headers = uudmap.h bitcount.h mg_data.h @@ -1064,9 +1064,9 @@ pod/perlintern.pod: $(MINIPERL_EXE) autodoc.pl embed.fnc pod/perlmodlib.pod: $(MINIPERL_EXE) pod/perlmodlib.PL MANIFEST $(MINIPERL) pod/perlmodlib.PL -q -pod/perl5235delta.pod: pod/perldelta.pod - $(RMS) pod/perl5235delta.pod - $(LNS) perldelta.pod pod/perl5235delta.pod +pod/perl5236delta.pod: pod/perldelta.pod + $(RMS) pod/perl5236delta.pod + $(LNS) perldelta.pod pod/perl5236delta.pod extra.pods: $(MINIPERL_EXE) -@test ! -f extra.pods || rm -f `cat extra.pods` diff --git a/NetWare/Makefile b/NetWare/Makefile index 68057e3..080d5db 100644 --- a/NetWare/Makefile +++ b/NetWare/Makefile @@ -86,7 +86,7 @@ NLM_VERSION = 3,20,0 # Here comes the CW tools - TO BE FILLED TO BUILD WITH CW - -MODULE_DESC = "Perl 5.23.5 for NetWare" +MODULE_DESC = "Perl 5.23.6 for NetWare" CCTYPE = CodeWarrior C_COMPILER = mwccnlm -c CPP_COMPILER = mwccnlm @@ -462,7 +462,7 @@ INST_NW_TOP2 = $(INST_NW_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -INST_VER = \5.23.5 +INST_VER = \5.23.6 # # Comment this out if you DON'T want your perl installation to have diff --git a/NetWare/config_H.wc b/NetWare/config_H.wc index d98d830..a505221 100644 --- a/NetWare/config_H.wc +++ b/NetWare/config_H.wc @@ -1042,7 +1042,7 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define ARCHLIB "c:\\perl\\5.23.5\\lib\\NetWare-x86-multi-thread" /**/ +#define ARCHLIB "c:\\perl\\5.23.6\\lib\\NetWare-x86-multi-thread" /**/ /*#define ARCHLIB_EXP "" /**/ /* ARCHNAME: @@ -1073,8 +1073,8 @@ * This symbol is the filename expanded version of the BIN symbol, for * programs that do not want to deal with that at run-time. */ -#define BIN "c:\\perl\\5.23.5\\bin\\NetWare-x86-multi-thread" /**/ -#define BIN_EXP "c:\\perl\\5.23.5\\bin\\NetWare-x86-multi-thread" /**/ +#define BIN "c:\\perl\\5.23.6\\bin\\NetWare-x86-multi-thread" /**/ +#define BIN_EXP "c:\\perl\\5.23.6\\bin\\NetWare-x86-multi-thread" /**/ /* BYTEORDER: * This symbol holds the hexadecimal constant defined in byteorder, @@ -3047,7 +3047,7 @@ * This symbol contains the ~name expanded version of SITEARCH, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define SITEARCH "c:\\perl\\site\\5.23.5\\lib\\NetWare-x86-multi-thread" /**/ +#define SITEARCH "c:\\perl\\site\\5.23.6\\lib\\NetWare-x86-multi-thread" /**/ /*#define SITEARCH_EXP "" /**/ /* SITELIB: @@ -3070,7 +3070,7 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "c:\\perl\\site\\5.23.5\\lib" /**/ +#define SITELIB "c:\\perl\\site\\5.23.6\\lib" /**/ /*#define SITELIB_EXP "" /**/ #define SITELIB_STEM "" /**/ diff --git a/Porting/Glossary b/Porting/Glossary index 68ee298..bc879f7 100644 --- a/Porting/Glossary +++ b/Porting/Glossary @@ -2809,14 +2809,14 @@ doubleinfbytes (infnan.U): doublekind (longdblfio.U): This variable, if defined, encodes the type of a double: - 1 = IEEE 754 32-bit big little endian, - 2 = IEEE 754 32-bit big big endian, - 3 = IEEE 754 64-bit big little endian, - 4 = IEEE 754 64-bit big big endian, - 5 = IEEE 754 128-bit big little endian, - 6 = IEEE 754 128-bit big big endian, - 7 = IEEE 754 64-bit big mixed endian le-be, - 8 = IEEE 754 64-bit big mixed endian be-le, + 1 = IEEE 754 32-bit little endian, + 2 = IEEE 754 32-bit big endian, + 3 = IEEE 754 64-bit little endian, + 4 = IEEE 754 64-bit big endian, + 5 = IEEE 754 128-bit little endian, + 6 = IEEE 754 128-bit big endian, + 7 = IEEE 754 64-bit mixed endian le-be, + 8 = IEEE 754 64-bit mixed endian be-le, -1 = unknown format. doublemantbits (mantbits.U): @@ -4060,8 +4060,8 @@ longdblinfbytes (infnan.U): longdblkind (d_longdbl.U): This variable, if defined, encodes the type of a long double: - 0 = double, 1 = IEEE 754 128-bit big little endian, - 2 = IEEE 754 128-bit big big endian, 3 = x86 80-bit little endian, + 0 = double, 1 = IEEE 754 128-bit little endian, + 2 = IEEE 754 128-bit big endian, 3 = x86 80-bit little endian, 4 = x86 80-bit big endian, 5 = double-double 128-bit little endian, 6 = double-double 128-bit big endian, -1 = unknown format. diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index fa1ea5b..bb6af71 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -179,7 +179,7 @@ use File::Glob qw(:case); }, 'base' => { - 'DISTRIBUTION' => 'RGARCIA/base-2.18.tar.gz', + 'DISTRIBUTION' => 'RJBS/base-2.23.tar.gz', 'FILES' => q[dist/base], }, @@ -226,7 +226,7 @@ use File::Glob qw(:case); }, 'Config::Perl::V' => { - 'DISTRIBUTION' => 'HMBRAND/Config-Perl-V-0.24.tgz', + 'DISTRIBUTION' => 'HMBRAND/Config-Perl-V-0.25.tgz', 'FILES' => q[cpan/Config-Perl-V], 'EXCLUDED' => [qw( examples/show-v.pl @@ -303,7 +303,7 @@ use File::Glob qw(:case); }, 'CPAN::Meta::Requirements' => { - 'DISTRIBUTION' => 'DAGOLDEN/CPAN-Meta-Requirements-2.133.tar.gz', + 'DISTRIBUTION' => 'DAGOLDEN/CPAN-Meta-Requirements-2.140.tar.gz', 'FILES' => q[cpan/CPAN-Meta-Requirements], 'EXCLUDED' => [ qw(t/00-report-prereqs.t), @@ -314,7 +314,7 @@ use File::Glob qw(:case); }, 'CPAN::Meta::YAML' => { - 'DISTRIBUTION' => 'DAGOLDEN/CPAN-Meta-YAML-0.017-TRIAL.tar.gz', + 'DISTRIBUTION' => 'DAGOLDEN/CPAN-Meta-YAML-0.018.tar.gz', 'FILES' => q[cpan/CPAN-Meta-YAML], 'EXCLUDED' => [ 't/00-report-prereqs.t', @@ -398,7 +398,7 @@ use File::Glob qw(:case); 'encoding::warnings' => { 'DISTRIBUTION' => 'AUDREYT/encoding-warnings-0.11.tar.gz', - 'FILES' => q[cpan/encoding-warnings], + 'FILES' => q[dist/encoding-warnings], 'EXCLUDED' => [ qr{^inc/Module/}, qw(t/0-signature.t), @@ -438,10 +438,7 @@ use File::Glob qw(:case); 'ExtUtils::Constant' => { - # Nick has confirmed that while we have diverged from CPAN, - # this package isn't primarily maintained in core - # Another release will happen "Sometime" - 'DISTRIBUTION' => '', #'NWCLARK/ExtUtils-Constant-0.16.tar.gz', + 'DISTRIBUTION' => 'NWCLARK/ExtUtils-Constant-0.23.tar.gz', 'FILES' => q[cpan/ExtUtils-Constant], 'EXCLUDED' => [ qw( lib/ExtUtils/Constant/Aaargh56Hash.pm @@ -449,6 +446,8 @@ use File::Glob qw(:case); examples/perl_regcomp_posix_keyword.pl ), ], + # cc37ebcee3 to fix VMS failure + 'CUSTOMIZED' => [ qw(t/Constant.t) ], }, 'ExtUtils::Install' => { @@ -690,7 +689,7 @@ use File::Glob qw(:case); }, 'Locale-Codes' => { - 'DISTRIBUTION' => 'SBECK/Locale-Codes-3.36.tar.gz', + 'DISTRIBUTION' => 'SBECK/Locale-Codes-3.37.tar.gz', 'FILES' => q[cpan/Locale-Codes], 'EXCLUDED' => [ qw( README.first @@ -736,7 +735,7 @@ use File::Glob qw(:case); }, 'Math::BigInt::FastCalc' => { - 'DISTRIBUTION' => 'PJACKLAM/Math-BigInt-FastCalc-0.37.tar.gz', + 'DISTRIBUTION' => 'PJACKLAM/Math-BigInt-FastCalc-0.38.tar.gz', 'FILES' => q[cpan/Math-BigInt-FastCalc], 'EXCLUDED' => [ qr{^inc/}, @@ -798,7 +797,7 @@ use File::Glob qw(:case); }, 'Module::CoreList' => { - 'DISTRIBUTION' => 'BINGOS/Module-CoreList-5.20151020.tar.gz', + 'DISTRIBUTION' => 'BINGOS/Module-CoreList-5.20151120.tar.gz', 'FILES' => q[dist/Module-CoreList], }, @@ -818,7 +817,7 @@ use File::Glob qw(:case); }, 'Module::Metadata' => { - 'DISTRIBUTION' => 'ETHER/Module-Metadata-1.000030-TRIAL.tar.gz', + 'DISTRIBUTION' => 'ETHER/Module-Metadata-1.000031-TRIAL.tar.gz', 'FILES' => q[cpan/Module-Metadata], 'EXCLUDED' => [ qw(t/00-report-prereqs.t), @@ -860,7 +859,7 @@ use File::Glob qw(:case); }, 'PathTools' => { - 'DISTRIBUTION' => 'RJBS/PathTools-3.59.tar.gz', + 'DISTRIBUTION' => 'RJBS/PathTools-3.60.tar.gz', 'FILES' => q[dist/PathTools], 'EXCLUDED' => [ qr{^t/lib/Test/}, @@ -919,6 +918,7 @@ use File::Glob qw(:case); 'EXCLUDED' => ['perldoc'], # https://rt.cpan.org/Ticket/Display.html?id=106798 + # https://rt.cpan.org/Ticket/Display.html?id=110368 'CUSTOMIZED' => [ qw[ lib/Pod/Perldoc.pm ] ], }, @@ -1021,7 +1021,7 @@ use File::Glob qw(:case); }, 'Term::ANSIColor' => { - 'DISTRIBUTION' => 'RRA/Term-ANSIColor-4.03.tar.gz', + 'DISTRIBUTION' => 'RRA/Term-ANSIColor-4.04.tar.gz', 'FILES' => q[cpan/Term-ANSIColor], 'EXCLUDED' => [ qr{^examples/}, @@ -1109,13 +1109,6 @@ use File::Glob qw(:case); 'Text::ParseWords' => { 'DISTRIBUTION' => 'CHORNY/Text-ParseWords-3.30.tar.gz', 'FILES' => q[cpan/Text-ParseWords], - - # Waiting to be merged upstream: - # see https://github.com/chorny/Text-ParseWords/pull/6 - 'CUSTOMIZED' => [ - qw( t/ParseWords.t - ), - ], }, 'Text-Tabs+Wrap' => { @@ -1227,7 +1220,7 @@ use File::Glob qw(:case); }, 'Unicode::Normalize' => { - 'DISTRIBUTION' => 'KHW/Unicode-Normalize-1.23.tar.gz', + 'DISTRIBUTION' => 'KHW/Unicode-Normalize-1.24.tar.gz', 'FILES' => q[cpan/Unicode-Normalize], }, diff --git a/Porting/checkAUTHORS.pl b/Porting/checkAUTHORS.pl index fe448dd..2520afb 100755 --- a/Porting/checkAUTHORS.pl +++ b/Porting/checkAUTHORS.pl @@ -588,6 +588,7 @@ crt\100kiski.net perl\100ctweten.amsite.com dairiki\100dairiki.org dairiki at dairiki.org dagolden\100cpan.org xdaveg\100gmail.com ++ xdg\100xdg.me damian\100conway.org damian\100cs.monash.edu.au dan\100sidhe.org sugalsd\100lbcc.cc.or.us + sugalskd\100osshe.edu diff --git a/Porting/config.sh b/Porting/config.sh index b7e75bd..eefd8a7 100644 --- a/Porting/config.sh +++ b/Porting/config.sh @@ -39,12 +39,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/hostname' api_revision='5' -api_subversion='5' +api_subversion='6' api_version='23' -api_versionstring='5.23.5' +api_versionstring='5.23.6' ar='ar' -archlib='/pro/lib/perl5/5.23.5/i686-linux-64int' -archlibexp='/pro/lib/perl5/5.23.5/i686-linux-64int' +archlib='/pro/lib/perl5/5.23.6/i686-linux-64int' +archlibexp='/pro/lib/perl5/5.23.6/i686-linux-64int' archname64='64int' archname='i686-linux-64int' archobjs='' @@ -813,7 +813,7 @@ incpath='' incpth='/usr/lib/gcc/i586-suse-linux/4.8/include /usr/local/include /usr/lib/gcc/i586-suse-linux/4.8/include-fixed /usr/lib/gcc/i586-suse-linux/4.8/../../../../i586-suse-linux/include /usr/include' inews='' initialinstalllocation='/pro/bin' -installarchlib='/pro/lib/perl5/5.23.5/i686-linux-64int' +installarchlib='/pro/lib/perl5/5.23.6/i686-linux-64int' installbin='/pro/bin' installhtml1dir='' installhtml3dir='' @@ -821,13 +821,13 @@ installman1dir='/pro/local/man/man1' installman3dir='/pro/local/man/man3' installprefix='/pro' installprefixexp='/pro' -installprivlib='/pro/lib/perl5/5.23.5' +installprivlib='/pro/lib/perl5/5.23.6' installscript='/pro/bin' -installsitearch='/pro/lib/perl5/site_perl/5.23.5/i686-linux-64int' +installsitearch='/pro/lib/perl5/site_perl/5.23.6/i686-linux-64int' installsitebin='/pro/bin' installsitehtml1dir='' installsitehtml3dir='' -installsitelib='/pro/lib/perl5/site_perl/5.23.5' +installsitelib='/pro/lib/perl5/site_perl/5.23.6' installsiteman1dir='/pro/local/man/man1' installsiteman3dir='/pro/local/man/man3' installsitescript='/pro/bin' @@ -953,7 +953,7 @@ perl_patchlevel='' perl_static_inline='static __inline__' perladmin='hmbrand@cpan.org' perllibs='-lnsl -ldl -lm -lcrypt -lutil -lc' -perlpath='/pro/bin/perl5.23.5' +perlpath='/pro/bin/perl5.23.6' pg='pg' phostname='hostname' pidtype='pid_t' @@ -962,8 +962,8 @@ pmake='' pr='' prefix='/pro' prefixexp='/pro' -privlib='/pro/lib/perl5/5.23.5' -privlibexp='/pro/lib/perl5/5.23.5' +privlib='/pro/lib/perl5/5.23.6' +privlibexp='/pro/lib/perl5/5.23.6' procselfexe='"/proc/self/exe"' prototype='define' ptrsize='4' @@ -1029,17 +1029,17 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 6, 17, 29, 31, 0' sig_size='69' signal_t='void' -sitearch='/pro/lib/perl5/site_perl/5.23.5/i686-linux-64int' -sitearchexp='/pro/lib/perl5/site_perl/5.23.5/i686-linux-64int' +sitearch='/pro/lib/perl5/site_perl/5.23.6/i686-linux-64int' +sitearchexp='/pro/lib/perl5/site_perl/5.23.6/i686-linux-64int' sitebin='/pro/bin' sitebinexp='/pro/bin' sitehtml1dir='' sitehtml1direxp='' sitehtml3dir='' sitehtml3direxp='' -sitelib='/pro/lib/perl5/site_perl/5.23.5' +sitelib='/pro/lib/perl5/site_perl/5.23.6' sitelib_stem='/pro/lib/perl5/site_perl' -sitelibexp='/pro/lib/perl5/site_perl/5.23.5' +sitelibexp='/pro/lib/perl5/site_perl/5.23.6' siteman1dir='/pro/local/man/man1' siteman1direxp='/pro/local/man/man1' siteman3dir='/pro/local/man/man3' @@ -1065,7 +1065,7 @@ src='.' ssizetype='ssize_t' st_ino_sign='1' st_ino_size='8' -startperl='#!/pro/bin/perl5.23.5' +startperl='#!/pro/bin/perl5.23.6' startsh='#!/bin/sh' static_ext=' ' stdchar='char' @@ -1078,7 +1078,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/usr/include/string.h' submit='' -subversion='5' +subversion='6' sysman='/usr/share/man/man1' sysroot='' tail='' @@ -1177,8 +1177,8 @@ vendorprefix='' vendorprefixexp='' vendorscript='' vendorscriptexp='' -version='5.23.5' -version_patchlevel_string='version 23 subversion 5' +version='5.23.6' +version_patchlevel_string='version 23 subversion 6' versiononly='define' vi='' xlibpth='/usr/lib/386 /lib/386' @@ -1188,10 +1188,10 @@ zcat='' zip='zip' PERL_REVISION=5 PERL_VERSION=23 -PERL_SUBVERSION=5 +PERL_SUBVERSION=6 PERL_API_REVISION=5 PERL_API_VERSION=23 -PERL_API_SUBVERSION=5 +PERL_API_SUBVERSION=6 PERL_PATCHLEVEL='' PERL_CONFIG_SH=true : Variables propagated from previous config.sh file. diff --git a/Porting/config_H b/Porting/config_H index 08ff181..674ce0d 100644 --- a/Porting/config_H +++ b/Porting/config_H @@ -960,8 +960,8 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define ARCHLIB "/pro/lib/perl5/5.23.5/i686-linux-64int-ld" /**/ -#define ARCHLIB_EXP "/pro/lib/perl5/5.23.5/i686-linux-64int-ld" /**/ +#define ARCHLIB "/pro/lib/perl5/5.23.6/i686-linux-64int-ld" /**/ +#define ARCHLIB_EXP "/pro/lib/perl5/5.23.6/i686-linux-64int-ld" /**/ /* ARCHNAME: * This symbol holds a string representing the architecture name. @@ -2068,8 +2068,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/pro/lib/perl5/5.23.5" /**/ -#define PRIVLIB_EXP "/pro/lib/perl5/5.23.5" /**/ +#define PRIVLIB "/pro/lib/perl5/5.23.6" /**/ +#define PRIVLIB_EXP "/pro/lib/perl5/5.23.6" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -2119,8 +2119,8 @@ * This symbol contains the ~name expanded version of SITEARCH, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define SITEARCH "/pro/lib/perl5/site_perl/5.23.5/i686-linux-64int-ld" /**/ -#define SITEARCH_EXP "/pro/lib/perl5/site_perl/5.23.5/i686-linux-64int-ld" /**/ +#define SITEARCH "/pro/lib/perl5/site_perl/5.23.6/i686-linux-64int-ld" /**/ +#define SITEARCH_EXP "/pro/lib/perl5/site_perl/5.23.6/i686-linux-64int-ld" /**/ /* SITELIB: * This symbol contains the name of the private library for this package. @@ -2142,8 +2142,8 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "/pro/lib/perl5/site_perl/5.23.5" /**/ -#define SITELIB_EXP "/pro/lib/perl5/site_perl/5.23.5" /**/ +#define SITELIB "/pro/lib/perl5/site_perl/5.23.6" /**/ +#define SITELIB_EXP "/pro/lib/perl5/site_perl/5.23.6" /**/ #define SITELIB_STEM "/pro/lib/perl5/site_perl" /**/ /* SSize_t: @@ -4282,7 +4282,7 @@ * script to make sure (one hopes) that it runs with perl and not * some shell. */ -#define STARTPERL "#!/pro/bin/perl5.23.5" /**/ +#define STARTPERL "#!/pro/bin/perl5.23.6" /**/ /* HAS_STDIO_STREAM_ARRAY: * This symbol, if defined, tells that there is an array diff --git a/Porting/deparse-skips.txt b/Porting/deparse-skips.txt index 9b3432d..878b512 100644 --- a/Porting/deparse-skips.txt +++ b/Porting/deparse-skips.txt @@ -261,7 +261,7 @@ __DEPARSE_FAILURES__ ../cpan/autodie/t/utf8_open.t ../cpan/autodie/t/utime.t ../cpan/autodie/t/version_tag.t -../cpan/encoding-warnings/t/4-lexical.t +../dist/encoding-warnings/t/4-lexical.t ../cpan/podlators/t/basic.t ../cpan/version/t/09_list_util.t ../dist/Attribute-Handlers/t/constants.t diff --git a/Porting/epigraphs.pod b/Porting/epigraphs.pod index c96c57a..e1fb55e 100644 --- a/Porting/epigraphs.pod +++ b/Porting/epigraphs.pod @@ -17,6 +17,55 @@ Consult your favorite dictionary for details. =head1 EPIGRAPHS +=head2 v5.23.5 - utastro!nather (Ed Nather), "The Story of Mel", in net.jokes, May 21, 1983. + +L + +After Mel had left the company for greener pa$ture$, the Big Boss asked +me to look at the code and see if I could find the test and reverse it. +Somewhat reluctantly, I agreed to look. Tracking Mel's code was a real +adventure. + +I have often felt that programming is an art form, whose real value can +only be appreciated by another versed in the same arcane art; there are +lovely gems and brilliant coups hidden from human view and admiration, +sometimes forever, by the very nature of the process. You can learn a +lot about an individual just by reading through his code, even in +hexadecimal. Mel was, I think, an unsung genius. + +Perhaps my greatest shock came when I found an innocent loop that had +no test in it. No test. None. Common sense said it had to be a closed +loop, where the program would circle, forever, endlessly. Program +control passed right through it, however, and safely out the other side. +It took me two weeks to figure it out. + +The RPC-4000 computer had a really modern facility called an index +register. It allowed the programmer to write a program loop that used +an indexed instruction inside; each time through, the number in the +index register was added to the address of that instruction, so it +would refer to the next datum in a series. He had only to increment +the index register each time through. Mel never used it. + +Instead, he would pull the instruction into a machine register, add one +to its address, and store it back. He would then execute the modified +instruction right from the register. The loop was written so this +additional execution time was taken into account -- just as this +instruction finished, the next one was right under the drum's read head, +ready to go. But the loop had no test in it. + +The vital clue came when I noticed the index register bit, the bit that +lay between the address and the operation code in the instruction word, +was turned on -- yet Mel never used the index register, leaving it zero +all the time. When the light went on it nearly blinded me. + +He had located the data he was working on near the top of memory -- the +largest locations the instructions could address -- so, after the last +datum was handled, incrementing the instruction address would make it +overflow. The carry would add one to the operation code, changing it to +the next one in the instruction set: a jump instruction. Sure enough, +the next program instruction was in address location zero, and the +program went happily on its way. + =head2 v5.23.4 - Denis Diderot, trans. David Coward, "Jacques the Fatalist" L @@ -154,6 +203,68 @@ L + + If the snow flies in my face, + Let me shake it off me! + If my heart within me speaks, + I'll sing bright and gaily! + + Will not listen what it says, + Have no ears for moaning. + Do not feel what it complains,-- + Only fools like groaning! + + Jolly brave into the world, + 'Gainst all wind and weather,-- + If there is no God on earth, + Let 's be gods down nether! + +=head2 v5.22.1-RC4 - Wilhelm Müller, trans. Anon., "The Signpost" (No. 20 in Schubert's song-cycle, "Winterreise") + +L + + Why do I shun all those highways + Which the other wanderer seeks? + Why do I find bridged by-ways + Through snow-covered deep creeks? + + For I have no crime committed, + Why I should now run from men,-- + What demented heart's desire + Drives me to a desert glen? + + Signposts on all highways stationed + Point their signs toward the towns, + Whilst I wonder 'yond moderation, + Without rest, yet seeking rest! + + One such signpost I see planted + Of my question unconcerned, + One road must my choice be granted, + Whence no man has yet returned! + +=head2 v5.22.1-RC3 - Wilhelm Müller, trans. Anon., "Stormy Morning" (No. 18 in Schubert's song-cycle, "Winterreise") + +L + + How the storm tore rents + In heavens gray attired! + The rags of cloud are flying + Around, of combat tired. + + And flames of fire lambent, + Fly between them and part, + That 's what I call a morning, + A morning after my heart! + + My heart sees in the heavens + Its own picture unspoilt-- + It's nothing but the Winter, + The Winter, cold and wild. + =head2 v5.22.1-RC2 - Wilhelm Müller, trans. Anon., "The Old Head" (No. 14 in Schubert's song-cycle, "Winterreise") L diff --git a/Porting/perldelta_template.pod b/Porting/perldelta_template.pod index 518e850..0406b74 100644 --- a/Porting/perldelta_template.pod +++ b/Porting/perldelta_template.pod @@ -378,7 +378,7 @@ here. XXX Generate this with: - perl Porting/acknowledgements.pl v5.23.5..HEAD + perl Porting/acknowledgements.pl v5.23.6..HEAD =head1 Reporting Bugs diff --git a/Porting/release_schedule.pod b/Porting/release_schedule.pod index 303b93f..42f98c5 100644 --- a/Porting/release_schedule.pod +++ b/Porting/release_schedule.pod @@ -23,7 +23,7 @@ Code freezes (which happen in the 5.23.X series) =head2 Perl 5.22 2015-06-01 5.22.0 ✓ Ricardo Signes - 2015-10-?? 5.22.1 Steve Hay + 2015-12-13 5.22.1 ✓ Steve Hay 2016-??-?? 5.22.2 Steve Hay 2016-??-?? 5.22.3 ?? @@ -56,7 +56,7 @@ you should reset the version numbers to the next blead series. 2015-08-20 5.23.2 ✓ Matthew Horsfall 2015-09-20 5.23.3 ✓ Peter Martini 2015-10-20 5.23.4 ✓ Steve Hay - 2015-11-20 5.23.5 Abigail + 2015-11-20 5.23.5 ✓ Abigail 2015-12-20 5.23.6 David Golden (RC0 for 5.24.0 will be released once we think that all the blockers have been @@ -71,30 +71,30 @@ and can't find a substitute amongst this list, mail p5p. (Please do not add any names to this list without prior consent of the Pumpking.) -Abigail -Aristotle Pagaltzis -Ask Bjørn Hansen -Chris Williams -Dave Cross -Dave Rolsky -David Golden -Florian Ragwitz -Jesse Luehrs -Jesse Vincent -Leon Brocard -Matt Trout -Matthew Horsfall -Max Maischein -Peter Martini -Philippe Bruhat -Ricardo Signes -Stevan Little -Steve Hay -Tatsuhiko Miyagawa -Tony Cook -Yves Orton -Zefram -Ævar Arnfjörð Bjarmason + Abigail + Aristotle Pagaltzis + Ask Bjørn Hansen + Chris Williams + Dave Cross + Dave Rolsky + David Golden + Florian Ragwitz + Jesse Luehrs + Jesse Vincent + Leon Brocard + Matt Trout + Matthew Horsfall + Max Maischein + Peter Martini + Philippe Bruhat + Ricardo Signes + Stevan Little + Steve Hay + Tatsuhiko Miyagawa + Tony Cook + Yves Orton + Zefram + Ævar Arnfjörð Bjarmason =head1 AUTHOR diff --git a/Porting/todo.pod b/Porting/todo.pod index ae5de30..9d4a55f 100644 --- a/Porting/todo.pod +++ b/Porting/todo.pod @@ -467,7 +467,7 @@ Natively 64-bit systems need neither -Duse64bitint nor -Duse64bitall. On these systems, it might be the default compilation mode, and there is currently no guarantee that passing no use64bitall option to the Configure process will build a 32bit perl. Implementing -Duse32bit* -options would be nice for perl 5.23.5. +options would be nice for perl 5.23.6. =head2 Profile Perl - am I hot or not? @@ -1169,7 +1169,7 @@ L =head1 Big projects Tasks that will get your name mentioned in the description of the "Highlights -of 5.23.5" +of 5.23.6" =head2 make ithreads more robust diff --git a/README.haiku b/README.haiku index c51aac6..39662be 100644 --- a/README.haiku +++ b/README.haiku @@ -22,9 +22,9 @@ The build procedure is completely standard: Make perl executable and create a symlink for libperl: chmod a+x /boot/common/bin/perl - cd /boot/common/lib; ln -s perl5/5.23.5/BePC-haiku/CORE/libperl.so . + cd /boot/common/lib; ln -s perl5/5.23.6/BePC-haiku/CORE/libperl.so . -Replace C<5.23.5> with your respective version of Perl. +Replace C<5.23.6> with your respective version of Perl. =head1 KNOWN PROBLEMS diff --git a/README.macosx b/README.macosx index ee168de..5cd9b9d 100644 --- a/README.macosx +++ b/README.macosx @@ -10,9 +10,9 @@ perlmacosx - Perl under Mac OS X This document briefly describes Perl under Mac OS X. - curl -O http://www.cpan.org/src/perl-5.23.5.tar.gz - tar -xzf perl-5.23.5.tar.gz - cd perl-5.23.5 + curl -O http://www.cpan.org/src/perl-5.23.6.tar.gz + tar -xzf perl-5.23.6.tar.gz + cd perl-5.23.6 ./Configure -des -Dprefix=/usr/local/ make make test @@ -20,7 +20,7 @@ This document briefly describes Perl under Mac OS X. =head1 DESCRIPTION -The latest Perl release (5.23.5 as of this writing) builds without changes +The latest Perl release (5.23.6 as of this writing) builds without changes under all versions of Mac OS X from 10.3 "Panther" onwards. In order to build your own version of Perl you will need 'make', diff --git a/README.os2 b/README.os2 index 543090e..94cdf02 100644 --- a/README.os2 +++ b/README.os2 @@ -619,7 +619,7 @@ C in F, see L<"PERLLIB_PREFIX">. =item Additional Perl modules - unzip perl_ste.zip -d f:/perllib/lib/site_perl/5.23.5/ + unzip perl_ste.zip -d f:/perllib/lib/site_perl/5.23.6/ Same remark as above applies. Additionally, if this directory is not one of directories on @INC (and @INC is influenced by C), you diff --git a/README.vms b/README.vms index cab20cb..97b50ae 100644 --- a/README.vms +++ b/README.vms @@ -142,11 +142,11 @@ You may need to set up a foreign symbol for the unpacking utility of choice. Once you have done so, use a command like the following to unpack the archive: - vmstar -xvf perl-5^.23^.5.tar + vmstar -xvf perl-5^.23^.6.tar Then set default to the top-level source directory like so: - set default [.perl-5^.23^.5] + set default [.perl-5^.23^.6] and proceed with configuration as described in the next section. diff --git a/amigaos4/amigaio.c b/amigaos4/amigaio.c index cd99d74..a5eb112 100644 --- a/amigaos4/amigaio.c +++ b/amigaos4/amigaio.c @@ -164,6 +164,7 @@ struct child_arg int amigaos_kill(Pid_t pid, int signal) { int i; + BOOL thistask = FALSE; Pid_t realpid = pid; // Perhaps we have a real pid from else where? /* Look for our DOS pid */ IExec->ObtainSemaphore(&fork_array_sema); @@ -172,12 +173,24 @@ int amigaos_kill(Pid_t pid, int signal) if (pseudo_children[i].ti_pid == pid) { realpid = (Pid_t)IDOS->GetPID(pseudo_children[i].ti_Process,GPID_PROCESS); + if(pseudo_children[i].ti_Process == IExec->FindTask(NULL)) + { + thistask = TRUE; + } break; } } IExec->ReleaseSemaphore(&fork_array_sema); /* Allow the C library to work out which signals are realy valid */ - return kill(realpid,signal); + if(thistask) + { + /* A quirk in newlib kill handling means it's better to call raise() rather than kill on out own task. */ + return raise(signal); + } + else + { + return kill(realpid,signal); + } } static THREAD_RET_TYPE amigaos4_start_child(void *arg) diff --git a/amigaos4/config.sh b/amigaos4/config.sh index 77331f0..17cd6be 100644 --- a/amigaos4/config.sh +++ b/amigaos4/config.sh @@ -15,6 +15,7 @@ libsdirs=' /SDK/newlib/lib' libsfound=' /SDK/newlib/lib/libsocket.a /SDK/newlib/lib/libm.a /SDK/newlib/lib/libc.a' libspath=' /SDK/newlib/lib /SDK/local/newlib/lib' make_set_make='MAKE=/SDK/C/gmake' +pager='/SYS/Utilities/MultiView' perl5='/SDK/Local/C/perl' perlpath='/SDK/Local/C/perl' prefix='/SDK/Local/newlib' diff --git a/autodoc.pl b/autodoc.pl index 4a55c3c..865ee08 100644 --- a/autodoc.pl +++ b/autodoc.pl @@ -417,8 +417,8 @@ whenever this documentation refers to C (and variants of that name, including in function names), it also (essentially transparently) means C. But the ordinals of characters differ between ASCII, EBCDIC, and -the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy more bytes -than in UTF-8. +the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different +number of bytes than in UTF-8. The listing below is alphabetical, case insensitive. diff --git a/caretx.c b/caretx.c index 9366bc4..67b8418 100644 --- a/caretx.c +++ b/caretx.c @@ -53,85 +53,91 @@ void Perl_set_caret_X(pTHX) { GV* tmpgv = gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, SVt_PV); /* $^X */ - if (tmpgv) { - SV *const caret_x = GvSV(tmpgv); + SV *const caret_x = GvSV(tmpgv); #if defined(OS2) - sv_setpv(caret_x, os2_execname(aTHX)); + sv_setpv(caret_x, os2_execname(aTHX)); #else # ifdef USE_KERN_PROC_PATHNAME - size_t size = 0; - int mib[4]; - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_PATHNAME; - mib[3] = -1; - - if (sysctl(mib, 4, NULL, &size, NULL, 0) == 0 - && size > 0 && size < MAXPATHLEN * MAXPATHLEN) { - sv_grow(caret_x, size); - - if (sysctl(mib, 4, SvPVX(caret_x), &size, NULL, 0) == 0 - && size > 2) { - SvPOK_only(caret_x); - SvCUR_set(caret_x, size - 1); - SvTAINT(caret_x); - return; - } + size_t size = 0; + int mib[4]; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PATHNAME; + mib[3] = -1; + + if (sysctl(mib, 4, NULL, &size, NULL, 0) == 0 + && size > 0 && size < MAXPATHLEN * MAXPATHLEN) { + sv_grow(caret_x, size); + + if (sysctl(mib, 4, SvPVX(caret_x), &size, NULL, 0) == 0 + && size > 2) { + SvPOK_only(caret_x); + SvCUR_set(caret_x, size - 1); + SvTAINT(caret_x); + return; } + } # elif defined(USE_NSGETEXECUTABLEPATH) - char buf[1]; - uint32_t size = sizeof(buf); - - _NSGetExecutablePath(buf, &size); - if (size < MAXPATHLEN * MAXPATHLEN) { - sv_grow(caret_x, size); - if (_NSGetExecutablePath(SvPVX(caret_x), &size) == 0) { - char *const tidied = realpath(SvPVX(caret_x), NULL); - if (tidied) { - sv_setpv(caret_x, tidied); - free(tidied); - } else { - SvPOK_only(caret_x); - SvCUR_set(caret_x, size); - } - return; + char buf[1]; + uint32_t size = sizeof(buf); + + _NSGetExecutablePath(buf, &size); + if (size < MAXPATHLEN * MAXPATHLEN) { + sv_grow(caret_x, size); + if (_NSGetExecutablePath(SvPVX(caret_x), &size) == 0) { + char *const tidied = realpath(SvPVX(caret_x), NULL); + if (tidied) { + sv_setpv(caret_x, tidied); + free(tidied); + } else { + SvPOK_only(caret_x); + SvCUR_set(caret_x, size); } + return; } + } # elif defined(HAS_PROCSELFEXE) - char buf[MAXPATHLEN]; - SSize_t len = readlink(PROCSELFEXE_PATH, buf, sizeof(buf) - 1); - /* NOTE: if the length returned by readlink() is sizeof(buf) - 1, - * it is impossible to know whether the result was truncated. */ + char buf[MAXPATHLEN]; + SSize_t len = readlink(PROCSELFEXE_PATH, buf, sizeof(buf) - 1); + /* NOTE: if the length returned by readlink() is sizeof(buf) - 1, + * it is impossible to know whether the result was truncated. */ - if (len != -1) { - buf[len] = '\0'; - } + if (len != -1) { + buf[len] = '\0'; + } - /* On Playstation2 Linux V1.0 (kernel 2.2.1) readlink(/proc/self/exe) - includes a spurious NUL which will cause $^X to fail in system - or backticks (this will prevent extensions from being built and - many tests from working). readlink is not meant to add a NUL. - Normal readlink works fine. - */ - if (len > 0 && buf[len-1] == '\0') { - len--; - } + /* On Playstation2 Linux V1.0 (kernel 2.2.1) readlink(/proc/self/exe) + includes a spurious NUL which will cause $^X to fail in system + or backticks (this will prevent extensions from being built and + many tests from working). readlink is not meant to add a NUL. + Normal readlink works fine. + */ + if (len > 0 && buf[len-1] == '\0') { + len--; + } - /* FreeBSD's implementation is acknowledged to be imperfect, sometimes - returning the text "unknown" from the readlink rather than the path - to the executable (or returning an error from the readlink). Any - valid path has a '/' in it somewhere, so use that to validate the - result. See http://www.freebsd.org/cgi/query-pr.cgi?pr=35703 - */ - if (len > 0 && memchr(buf, '/', len)) { - sv_setpvn(caret_x, buf, len); - return; - } + /* FreeBSD's implementation is acknowledged to be imperfect, sometimes + returning the text "unknown" from the readlink rather than the path + to the executable (or returning an error from the readlink). Any + valid path has a '/' in it somewhere, so use that to validate the + result. See http://www.freebsd.org/cgi/query-pr.cgi?pr=35703 + */ + if (len > 0 && memchr(buf, '/', len)) { + sv_setpvn(caret_x, buf, len); + return; + } +# elif defined(WIN32) + char *ansi; + WCHAR widename[MAX_PATH]; + GetModuleFileNameW(NULL, widename, sizeof(widename)/sizeof(WCHAR)); + ansi = win32_ansipath(widename); + sv_setpv(caret_x, ansi); + win32_free(ansi); + return; # endif - /* Fallback to this: */ - sv_setpv(caret_x, PL_origargv[0]); + /* Fallback to this: */ + sv_setpv(caret_x, PL_origargv[0]); #endif - } } /* diff --git a/cflags.SH b/cflags.SH index ec70ed2..a50044e 100755 --- a/cflags.SH +++ b/cflags.SH @@ -378,6 +378,29 @@ do esac done +# If usethreads and clang, add -Wthread-safety for clang 3.6 or later. +# gccversion is defined also for clang, because compat, use that for matching. +# Apple overwrites clang version with XCode version, see hints/darwin.sh +# for the gory details. Aggressively forward-proofing. +case "$usethreads" in +define) +case "$gccversion" in +*" Clang 3."[56789]*|*" Clang "[456]*|*"Apple LLVM 6.1"*|*"Apple LLVM "[789]*) + for f in -Wthread-safety + do + case " $warn " in + *" $f "*) ;; # Skip if already there. + *) + echo "cflags.SH: Adding $f because usethreads and clang and gccversion '$gccversion'" + warn="$warn $f" + ;; + esac + done +;; +esac +;; +esac + echo "cflags.SH: cc = $cc" echo "cflags.SH: ccflags = $ccflags" echo "cflags.SH: stdflags = $stdflags" diff --git a/charclass_invlists.h b/charclass_invlists.h index 4316b93..8a37ab6 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -99537,8 +99537,8 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */ * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt - * 2a113118f00b0a9ad6a12eb55b8341a332d8547a8841df2377e938e0fcd1b967 lib/unicore/mktables + * 8e23f7adafce8ef1aadbbb3f1e942c14f5d5c8318599cae7ed0ad555e60d4639 lib/unicore/mktables * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version - * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl + * 996abda3c0fbc2bfd575092af09e3b9b0331e624eb2e969a268457f8fd31ecbb regen/charset_translations.pl * 8a097f8f726bb1619af2f27f149ab87e60a1602f790147e3a561358be16abd27 regen/mk_invlists.pl * ex: set ro: */ diff --git a/config_h.SH b/config_h.SH index aa806cf..01ac23d 100755 --- a/config_h.SH +++ b/config_h.SH @@ -1967,8 +1967,10 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un * LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE * LONG_DOUBLE_IS_UNKNOWN_FORMAT * It is only defined if the system supports long doubles. */ @@ -1982,9 +1984,14 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un #define LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN 2 #define LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 3 #define LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN 4 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN 5 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE 5 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE 7 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE 8 #define LONG_DOUBLE_IS_UNKNOWN_FORMAT -1 +/* Backward compat. */ +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE #endif /* HAS_LONG_LONG: diff --git a/cpan/CPAN-Meta-Requirements/lib/CPAN/Meta/Requirements.pm b/cpan/CPAN-Meta-Requirements/lib/CPAN/Meta/Requirements.pm index 037ea50..b0e83b0 100644 --- a/cpan/CPAN-Meta-Requirements/lib/CPAN/Meta/Requirements.pm +++ b/cpan/CPAN-Meta-Requirements/lib/CPAN/Meta/Requirements.pm @@ -1,9 +1,10 @@ +use 5.006; # keep at v5.6 for CPAN.pm use strict; use warnings; package CPAN::Meta::Requirements; # ABSTRACT: a set of version requirements for a CPAN dist -our $VERSION = '2.133'; +our $VERSION = '2.140'; #pod =head1 SYNOPSIS #pod @@ -115,7 +116,7 @@ sub _version_object { if (not defined $version or (!ref($version) && $version eq '0')) { return $V0; } - elsif ( ref($version) eq 'version' || _isa_version($version) ) { + elsif ( ref($version) eq 'version' || ( ref($version) && _isa_version($version) ) ) { $vobj = $version; } else { @@ -124,8 +125,14 @@ sub _version_object { my $magic = _find_magic_vstring( $version ); $version = $magic if length $magic; } + # pad to 3 characters if before 5.8.1 and appears to be a v-string + if ( $] < 5.008001 && $version !~ /\A[0-9]/ && substr($version,0,1) ne 'v' && length($version) < 3 ) { + $version .= "\0" x (3 - length($version)); + } eval { local $SIG{__WARN__} = sub { die "Invalid version: $_[0]" }; + # avoid specific segfault on some older version.pm versions + die "Invalid version: $version" if $version eq 'version'; $vobj = version->new($version); }; if ( my $err = $@ ) { @@ -218,7 +225,7 @@ BEGIN { return $self; }; - + no strict 'refs'; *$to_add = $code; } @@ -237,7 +244,7 @@ sub add_minimum { if $self->is_finalized; $self->{requirements}{ $name } = - CPAN::Meta::Requirements::_Range::Range->with_minimum($V0); + CPAN::Meta::Requirements::_Range::Range->with_minimum($V0, $name); } else { $version = $self->_version_object( $name, $version ); @@ -251,9 +258,9 @@ sub add_minimum { #pod #pod $req->add_requirements( $another_req_object ); #pod -#pod This method adds all the requirements in the given CPAN::Meta::Requirements object -#pod to the requirements object on which it was called. If there are any conflicts, -#pod an exception is thrown. +#pod This method adds all the requirements in the given CPAN::Meta::Requirements +#pod object to the requirements object on which it was called. If there are any +#pod conflicts, an exception is thrown. #pod #pod This method returns the requirements object. #pod @@ -330,7 +337,7 @@ sub clear_requirement { #pod the format described in L or undef if the given module has no #pod requirements. This should only be used for informational purposes such as error #pod messages and should not be interpreted or used for comparison (see -#pod L instead.) +#pod L instead). #pod #pod =cut @@ -341,6 +348,25 @@ sub requirements_for_module { return $entry->as_string; } +#pod =method structured_requirements_for_module +#pod +#pod $req->structured_requirements_for_module( $module ); +#pod +#pod This returns a data structure containing the version requirements for a given +#pod module or undef if the given module has no requirements. This should +#pod not be used for version checks (see L instead). +#pod +#pod Added in version 2.134. +#pod +#pod =cut + +sub structured_requirements_for_module { + my ($self, $module) = @_; + my $entry = $self->__entry_for($module); + return unless $entry; + return $entry->as_struct; +} + #pod =method required_modules #pod #pod This method returns a list of all the modules for which requirements have been @@ -378,7 +404,7 @@ sub __modify_entry_for { if $fin and not $old; my $new = ($old || 'CPAN::Meta::Requirements::_Range::Range') - ->$method($version); + ->$method($version, $name); Carp::confess("can't modify finalized requirements") if $fin and $old->as_string ne $new->as_string; @@ -589,36 +615,62 @@ sub from_string_hash { sub as_string { return "== $_[0]{version}" } + sub as_struct { return [ [ '==', "$_[0]{version}" ] ] } + sub as_modifiers { return [ [ exact_version => $_[0]{version} ] ] } + sub _reject_requirements { + my ($self, $module, $error) = @_; + Carp::confess("illegal requirements for $module: $error") + } + sub _clone { (ref $_[0])->_new( version->new( $_[0]{version} ) ) } sub with_exact_version { - my ($self, $version) = @_; + my ($self, $version, $module) = @_; + $module = 'module' unless defined $module; return $self->_clone if $self->_accepts($version); - Carp::confess("illegal requirements: unequal exact version specified"); + $self->_reject_requirements( + $module, + "can't be exactly $version when exact requirement is already $self->{version}", + ); } sub with_minimum { - my ($self, $minimum) = @_; + my ($self, $minimum, $module) = @_; + $module = 'module' unless defined $module; + return $self->_clone if $self->{version} >= $minimum; - Carp::confess("illegal requirements: minimum above exact specification"); + $self->_reject_requirements( + $module, + "minimum $minimum exceeds exact specification $self->{version}", + ); } sub with_maximum { - my ($self, $maximum) = @_; + my ($self, $maximum, $module) = @_; + $module = 'module' unless defined $module; + return $self->_clone if $self->{version} <= $maximum; - Carp::confess("illegal requirements: maximum below exact specification"); + $self->_reject_requirements( + $module, + "maximum $maximum below exact specification $self->{version}", + ); } sub with_exclusion { - my ($self, $exclusion) = @_; + my ($self, $exclusion, $module) = @_; + $module = 'module' unless defined $module; + return $self->_clone unless $exclusion == $self->{version}; - Carp::confess("illegal requirements: excluded exact specification"); + $self->_reject_requirements( + $module, + "tried to exclude $exclusion, which is already exactly specified", + ); } } @@ -655,61 +707,87 @@ sub from_string_hash { return \@mods; } - sub as_string { + sub as_struct { my ($self) = @_; return 0 if ! keys %$self; - return "$self->{minimum}" if (keys %$self) == 1 and exists $self->{minimum}; - my @exclusions = @{ $self->{exclusions} || [] }; my @parts; - for my $pair ( + for my $tuple ( [ qw( >= > minimum ) ], [ qw( <= < maximum ) ], ) { - my ($op, $e_op, $k) = @$pair; + my ($op, $e_op, $k) = @$tuple; if (exists $self->{$k}) { my @new_exclusions = grep { $_ != $self->{ $k } } @exclusions; if (@new_exclusions == @exclusions) { - push @parts, "$op $self->{ $k }"; + push @parts, [ $op, "$self->{ $k }" ]; } else { - push @parts, "$e_op $self->{ $k }"; + push @parts, [ $e_op, "$self->{ $k }" ]; @exclusions = @new_exclusions; } } } - push @parts, map {; "!= $_" } @exclusions; + push @parts, map {; [ "!=", "$_" ] } @exclusions; + + return \@parts; + } + + sub as_string { + my ($self) = @_; + + my @parts = @{ $self->as_struct }; + + return $parts[0][1] if @parts == 1 and $parts[0][0] eq '>='; - return join q{, }, @parts; + return join q{, }, map {; join q{ }, @$_ } @parts; + } + + sub _reject_requirements { + my ($self, $module, $error) = @_; + Carp::confess("illegal requirements for $module: $error") } sub with_exact_version { - my ($self, $version) = @_; + my ($self, $version, $module) = @_; + $module = 'module' unless defined $module; $self = $self->_clone; - Carp::confess("illegal requirements: exact specification outside of range") - unless $self->_accepts($version); + unless ($self->_accepts($version)) { + $self->_reject_requirements( + $module, + "exact specification $version outside of range " . $self->as_string + ); + } return CPAN::Meta::Requirements::_Range::Exact->_new($version); } sub _simplify { - my ($self) = @_; + my ($self, $module) = @_; if (defined $self->{minimum} and defined $self->{maximum}) { if ($self->{minimum} == $self->{maximum}) { - Carp::confess("illegal requirements: excluded all values") - if grep { $_ == $self->{minimum} } @{ $self->{exclusions} || [] }; + if (grep { $_ == $self->{minimum} } @{ $self->{exclusions} || [] }) { + $self->_reject_requirements( + $module, + "minimum and maximum are both $self->{minimum}, which is excluded", + ); + } return CPAN::Meta::Requirements::_Range::Exact->_new($self->{minimum}) } - Carp::confess("illegal requirements: minimum exceeds maximum") - if $self->{minimum} > $self->{maximum}; + if ($self->{minimum} > $self->{maximum}) { + $self->_reject_requirements( + $module, + "minimum $self->{minimum} exceeds maximum $self->{maximum}", + ); + } } # eliminate irrelevant exclusions @@ -728,7 +806,8 @@ sub from_string_hash { } sub with_minimum { - my ($self, $minimum) = @_; + my ($self, $minimum, $module) = @_; + $module = 'module' unless defined $module; $self = $self->_clone; if (defined (my $old_min = $self->{minimum})) { @@ -737,11 +816,12 @@ sub from_string_hash { $self->{minimum} = $minimum; } - return $self->_simplify; + return $self->_simplify($module); } sub with_maximum { - my ($self, $maximum) = @_; + my ($self, $maximum, $module) = @_; + $module = 'module' unless defined $module; $self = $self->_clone; if (defined (my $old_max = $self->{maximum})) { @@ -750,16 +830,17 @@ sub from_string_hash { $self->{maximum} = $maximum; } - return $self->_simplify; + return $self->_simplify($module); } sub with_exclusion { - my ($self, $exclusion) = @_; + my ($self, $exclusion, $module) = @_; + $module = 'module' unless defined $module; $self = $self->_clone; push @{ $self->{exclusions} ||= [] }, $exclusion; - return $self->_simplify; + return $self->_simplify($module); } sub _accepts { @@ -789,7 +870,7 @@ CPAN::Meta::Requirements - a set of version requirements for a CPAN dist =head1 VERSION -version 2.133 +version 2.140 =head1 SYNOPSIS @@ -889,9 +970,9 @@ This method returns the requirements object. $req->add_requirements( $another_req_object ); -This method adds all the requirements in the given CPAN::Meta::Requirements object -to the requirements object on which it was called. If there are any conflicts, -an exception is thrown. +This method adds all the requirements in the given CPAN::Meta::Requirements +object to the requirements object on which it was called. If there are any +conflicts, an exception is thrown. This method returns the requirements object. @@ -926,7 +1007,17 @@ This returns a string containing the version requirements for a given module in the format described in L or undef if the given module has no requirements. This should only be used for informational purposes such as error messages and should not be interpreted or used for comparison (see -L instead.) +L instead). + +=head2 structured_requirements_for_module + + $req->structured_requirements_for_module( $module ); + +This returns a data structure containing the version requirements for a given +module or undef if the given module has no requirements. This should +not be used for version checks (see L instead). + +Added in version 2.134. =head2 required_modules @@ -1043,7 +1134,7 @@ method. =head2 Bugs / Feature Requests Please report any bugs or feature requests through the issue tracker -at L. +at L. You will be notified automatically of any progress on your issue. =head2 Source Code @@ -1051,9 +1142,9 @@ You will be notified automatically of any progress on your issue. This is open source software. The code repository is available for public review and contribution under the terms of the license. -L +L - git clone https://github.com/dagolden/CPAN-Meta-Requirements.git + git clone https://github.com/Perl-Toolchain-Gang/CPAN-Meta-Requirements.git =head1 AUTHORS diff --git a/cpan/CPAN-Meta-Requirements/t/accepts.t b/cpan/CPAN-Meta-Requirements/t/accepts.t index 75bc22f..8a694ea 100644 --- a/cpan/CPAN-Meta-Requirements/t/accepts.t +++ b/cpan/CPAN-Meta-Requirements/t/accepts.t @@ -8,32 +8,32 @@ use Test::More 0.88; { my $req = CPAN::Meta::Requirements->new->add_minimum(Foo => 1); - ok( $req->accepts_module(Foo => 1)); - ok(! $req->accepts_module(Foo => 0)); + ok( $req->accepts_module(Foo => 1), "need 1, got 1"); + ok(! $req->accepts_module(Foo => 0), "need 0, got 1"); } { my $req = CPAN::Meta::Requirements->new->add_minimum(Foo => 0); - ok( $req->accepts_module(Foo => 1)); - ok( $req->accepts_module(Foo => undef)); - ok( $req->accepts_module(Foo => "v0")); - ok( $req->accepts_module(Foo => v1.2.3)); - ok( $req->accepts_module(Foo => "v1.2.3")); + ok( $req->accepts_module(Foo => 1), "need 0, got 1"); + ok( $req->accepts_module(Foo => undef), "need 0, got undef"); + ok( $req->accepts_module(Foo => "v0"), "need 0, got 'v0'"); + ok( $req->accepts_module(Foo => v1.2.3), "need 0, got v1.2.3"); + ok( $req->accepts_module(Foo => "v1.2.3"), "need 0, got 'v1.2.3'"); } { my $req = CPAN::Meta::Requirements->new->add_maximum(Foo => 1); - ok( $req->accepts_module(Foo => 1)); - ok(! $req->accepts_module(Foo => 2)); + ok( $req->accepts_module(Foo => 1), "need <=1, got 1"); + ok(! $req->accepts_module(Foo => 2), "need <=1, got 2"); } { my $req = CPAN::Meta::Requirements->new->add_exclusion(Foo => 1); - ok( $req->accepts_module(Foo => 0)); - ok(! $req->accepts_module(Foo => 1)); + ok( $req->accepts_module(Foo => 0), "need !1, got 0"); + ok(! $req->accepts_module(Foo => 1), "need !1, got 1"); } done_testing; diff --git a/cpan/CPAN-Meta-Requirements/t/bad_version_hook.t b/cpan/CPAN-Meta-Requirements/t/bad_version_hook.t index 5eef7fb..d021466 100644 --- a/cpan/CPAN-Meta-Requirements/t/bad_version_hook.t +++ b/cpan/CPAN-Meta-Requirements/t/bad_version_hook.t @@ -9,6 +9,7 @@ use Test::More 0.88; my %DATA = ( 'Foo::Bar' => [ 10, 10 ], 'Foo::Baz' => [ 'invalid_version', 42 ], + 'Foo::Qux' => [ 'version', 42 ], ); my %input = map { ($_ => $DATA{$_}->[0]) } keys %DATA; my %expected = map { ($_ => $DATA{$_}->[1]) } keys %DATA; @@ -16,6 +17,8 @@ my %expected = map { ($_ => $DATA{$_}->[1]) } keys %DATA; sub dies_ok (&@) { my ($code, $qr, $comment) = @_; + no warnings 'redefine'; + local *Regexp::CARP_TRACE = sub { "" }; my $lived = eval { $code->(); 1 }; if ($lived) { @@ -26,14 +29,18 @@ sub dies_ok (&@) { } my $hook_text; -sub _fixit { my ($v, $m) = @_; $hook_text = $m; return version->new(42) } +sub _fixit { my ($v, $m) = @_; $hook_text .= $m; return version->new(42) } { my $req = CPAN::Meta::Requirements->new( {bad_version_hook => \&_fixit} ); my ($k, $v); - $req->add_minimum($k => $v) while ($k, $v) = each %input; - is $hook_text, 'Foo::Baz', 'hook stored module name'; + while (($k, $v) = each %input) { + note "adding minimum requirement: $k => $v"; + eval { $req->add_minimum($k => $v) }; + is( $@, '', "adding minimum '$k' for $v" ); + } + like( $hook_text, qr/Foo::Baz/, 'hook stored module name' ); is_deeply( $req->as_string_hash, diff --git a/cpan/CPAN-Meta-Requirements/t/basic.t b/cpan/CPAN-Meta-Requirements/t/basic.t index ba029f4..26b252c 100644 --- a/cpan/CPAN-Meta-Requirements/t/basic.t +++ b/cpan/CPAN-Meta-Requirements/t/basic.t @@ -8,6 +8,8 @@ use Test::More 0.88; sub dies_ok (&@) { my ($code, $qr, $comment) = @_; + no warnings 'redefine'; + local *Regexp::CARP_TRACE = sub { "" }; my $lived = eval { $code->(); 1 }; if ($lived) { @@ -126,7 +128,7 @@ sub dies_ok (&@) { $req->add_exclusion(Foo => 1); dies_ok { $req->add_maximum(Foo => 1); } - qr/excluded all/, + qr/both 1, which is excluded/, "can't exclude all values" ; } @@ -142,13 +144,13 @@ sub dies_ok (&@) { my $req = CPAN::Meta::Requirements->new; $req->add_minimum(Foo => 1); dies_ok { $req->add_maximum(Foo => 0.5); } - qr/minimum exceeds maximum/, + qr/minimum 1 exceeds maximum/, "maximum must exceed (or equal) minimum"; $req = CPAN::Meta::Requirements->new; $req->add_maximum(Foo => 0.5); dies_ok { $req->add_minimum(Foo => 1); } - qr/minimum exceeds maximum/, + qr/minimum 1 exceeds maximum/, "maximum must exceed (or equal) minimum"; } @@ -188,6 +190,18 @@ sub dies_ok (&@) { }, 'test exclusion-skipping', ); + + is_deeply( + $req->structured_requirements_for_module('Foo'), + # remember, it's okay to change the exact results, as long as the meaning + # is unchanged -- rjbs, 2012-07-11 + [ + [ '>=', '1' ], + [ '<=', '3' ], + [ '!=', '2' ], + ], + "structured requirements for Foo", + ); } sub foo_1 { @@ -204,21 +218,21 @@ sub foo_1 { is_deeply($req->as_string_hash, { Foo => '== 1' }, "exact requirement"); dies_ok { $req->exact_version(Foo => 2); } - qr/unequal/, + qr/can't be exactly 2.+already/, "can't exactly specify differing versions" ; $req = foo_1; $req->add_minimum(Foo => 0); # ignored $req->add_maximum(Foo => 2); # ignored - dies_ok { $req->add_maximum(Foo => 0); } qr/maximum below/, "max < fixed"; + dies_ok { $req->add_maximum(Foo => 0); } qr/maximum 0 below exact/, "max < fixed"; $req = foo_1; - dies_ok { $req->add_minimum(Foo => 2); } qr/minimum above/, "min > fixed"; + dies_ok { $req->add_minimum(Foo => 2); } qr/minimum 2 exceeds exact/, "min > fixed"; $req = foo_1; $req->add_exclusion(Foo => 8); # ignored - dies_ok { $req->add_exclusion(Foo => 1); } qr/excluded exact/, "!= && =="; + dies_ok { $req->add_exclusion(Foo => 1); } qr/tried to exclude/, "!= && =="; } { @@ -226,6 +240,12 @@ sub foo_1 { is($req->requirements_for_module('Foo'), '== 1', 'requirements_for_module'); + is_deeply( + $req->structured_requirements_for_module('Foo'), + [ [ '==', '1' ] ], + 'structured_requirements_for_module' + ); + # test empty/undef returns my @list = $req->requirements_for_module('FooBarBamBaz'); my $scalar = $req->requirements_for_module('FooBarBamBaz'); diff --git a/cpan/CPAN-Meta-Requirements/t/finalize.t b/cpan/CPAN-Meta-Requirements/t/finalize.t index 58048b5..aa139d3 100644 --- a/cpan/CPAN-Meta-Requirements/t/finalize.t +++ b/cpan/CPAN-Meta-Requirements/t/finalize.t @@ -8,6 +8,8 @@ use Test::More 0.88; sub dies_ok (&@) { my ($code, $qr, $comment) = @_; + no warnings 'redefine'; + local *Regexp::CARP_TRACE = sub { "" }; my $lived = eval { $code->(); 1 }; if ($lived) { diff --git a/cpan/CPAN-Meta-Requirements/t/from-hash.t b/cpan/CPAN-Meta-Requirements/t/from-hash.t index 73ec214..fa5d398 100644 --- a/cpan/CPAN-Meta-Requirements/t/from-hash.t +++ b/cpan/CPAN-Meta-Requirements/t/from-hash.t @@ -8,6 +8,8 @@ use Test::More 0.88; sub dies_ok (&@) { my ($code, $qr, $comment) = @_; + no warnings 'redefine'; + local *Regexp::CARP_TRACE = sub { "" }; my $lived = eval { $code->(); 1 }; if ($lived) { @@ -33,7 +35,9 @@ sub dies_ok (&@) { ); } -{ +SKIP: { + skip "Can't tell v-strings from strings until 5.8.1", 1 + unless $] gt '5.008'; my $string_hash = { Left => 10, Shared => '= 2', @@ -64,7 +68,9 @@ sub dies_ok (&@) { ); } -{ +SKIP: { + skip "Can't tell v-strings from strings until 5.8.1", 2 + unless $] gt '5.008'; my $string_hash = { Left => 10, Shared => v50.44.60, @@ -74,7 +80,8 @@ sub dies_ok (&@) { my $warning; local $SIG{__WARN__} = sub { $warning = join("\n",@_) }; - my $req = CPAN::Meta::Requirements->from_string_hash($string_hash); + my $req = eval { CPAN::Meta::Requirements->from_string_hash($string_hash); }; + is( $@, '', "vstring in string hash lives" ); ok( $req->accepts_module(Shared => 'v50.44.60'), diff --git a/cpan/CPAN-Meta-Requirements/t/merge.t b/cpan/CPAN-Meta-Requirements/t/merge.t index a051356..6610c05 100644 --- a/cpan/CPAN-Meta-Requirements/t/merge.t +++ b/cpan/CPAN-Meta-Requirements/t/merge.t @@ -8,6 +8,8 @@ use Test::More 0.88; sub dies_ok (&@) { my ($code, $qr, $comment) = @_; + no warnings 'redefine'; + local *Regexp::CARP_TRACE = sub { "" }; my $lived = eval { $code->(); 1 }; if ($lived) { diff --git a/cpan/CPAN-Meta-Requirements/t/strings.t b/cpan/CPAN-Meta-Requirements/t/strings.t index 55a28be..da4e4e1 100644 --- a/cpan/CPAN-Meta-Requirements/t/strings.t +++ b/cpan/CPAN-Meta-Requirements/t/strings.t @@ -5,6 +5,8 @@ use Test::More 0.88; sub dies_ok (&@) { my ($code, $qr, $comment) = @_; + no warnings 'redefine'; + local *Regexp::CARP_TRACE = sub { "" }; my $lived = eval { $code->(); 1 }; if ($lived) { diff --git a/cpan/CPAN-Meta-YAML/lib/CPAN/Meta/YAML.pm b/cpan/CPAN-Meta-YAML/lib/CPAN/Meta/YAML.pm index f0c1900..746abd6 100644 --- a/cpan/CPAN-Meta-YAML/lib/CPAN/Meta/YAML.pm +++ b/cpan/CPAN-Meta-YAML/lib/CPAN/Meta/YAML.pm @@ -5,7 +5,7 @@ package CPAN::Meta::YAML; # git description: v1.68-2-gcc5324e # XXX-INGY is 5.8.1 too old/broken for utf8? # XXX-XDG Lancaster consensus was that it was sufficient until # proven otherwise -$CPAN::Meta::YAML::VERSION = '0.017'; # TRIAL +$CPAN::Meta::YAML::VERSION = '0.018'; ; # original $VERSION removed by Doppelgaenger ##################################################################### @@ -878,7 +878,7 @@ CPAN::Meta::YAML - Read and write a subset of YAML for CPAN Meta files =head1 VERSION -version 0.017 +version 0.018 =head1 SYNOPSIS diff --git a/cpan/Locale-Codes/lib/Locale/Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes.pm index fa69aba..118c277 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes.pm @@ -31,7 +31,7 @@ our($VERSION,%Data,%Retired); # $Retired{ TYPE }{ CODESET }{ code }{ CODE } = NAME # { name }{ NAME } = [CODE,NAME] (the key is lowercase) -$VERSION='3.36'; +$VERSION='3.37'; #======================================================================= # @@ -43,14 +43,14 @@ sub _code { return 1 if (@_ > 3); my($type,$code,$codeset) = @_; - $code = '' if (! $code); + $code = '' if (! defined $code); # Determine the codeset $codeset = $ALL_CODESETS{$type}{'default'} if (! defined($codeset) || $codeset eq ''); $codeset = lc($codeset); - return 1 if (! exists $ALL_CODESETS{$type}{'codesets'}{$codeset}); + return (1) if (! exists $ALL_CODESETS{$type}{'codesets'}{$codeset}); return (0,$code,$codeset) if (! $code); # Determine the properties of the codeset @@ -59,20 +59,29 @@ sub _code { if ($op eq 'lc') { $code = lc($code); + return (0,$code,$codeset); + } - } elsif ($op eq 'uc') { + if ($op eq 'uc') { $code = uc($code); + return (0,$code,$codeset); + } - } elsif ($op eq 'ucfirst') { + if ($op eq 'ucfirst') { $code = ucfirst(lc($code)); + return (0,$code,$codeset); + } - } elsif ($op eq 'numeric') { + # uncoverable branch false + if ($op eq 'numeric') { return (1) unless ($code =~ /^\d+$/); my $l = $args[0]; $code = sprintf("%.${l}d", $code); + return (0,$code,$codeset); } - return (0,$code,$codeset); + # uncoverable statement + die "ERROR: codeset not defined correctly: $codeset [$op]\n"; } #======================================================================= @@ -90,8 +99,7 @@ sub _code2name { } my($err,$code,$codeset) = _code($type,@args); - return undef if ($err || - ! defined $code); + return undef if ($err); $code = $Data{$type}{'codealias'}{$codeset}{$code} if (exists $Data{$type}{'codealias'}{$codeset}{$code}); diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Changes.pod b/cpan/Locale-Codes/lib/Locale/Codes/Changes.pod index c5a8ee5..199aa96 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Changes.pod +++ b/cpan/Locale-Codes/lib/Locale/Codes/Changes.pod @@ -30,7 +30,9 @@ codes, I will add them to the module and release a new version. =head1 VERSION 3.38 (planned 2016-03-01; sbeck) -=head1 VERSION 3.37 (planned 2015-12-01; sbeck) +=head1 VERSION 3.37 (2015-12-01; sbeck) + +NEW CODE(s) =head1 VERSION 3.36 (2015-09-01; sbeck) diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Constants.pm b/cpan/Locale-Codes/lib/Locale/Codes/Constants.pm index d4efcfc..44e8f6d 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Constants.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Constants.pm @@ -17,7 +17,7 @@ require Exporter; our($VERSION,@ISA,@EXPORT); our(%ALL_CODESETS); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Country.pm b/cpan/Locale-Codes/lib/Locale/Codes/Country.pm index 4dd6b80..a182431 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Country.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Country.pm @@ -22,7 +22,7 @@ use Locale::Codes::Country_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2country country2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Country_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/Country_Codes.pm index 41e4635..11262e8 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Country_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Country_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::Country_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:05:01 EDT 2015 +# Generated on: Tue Dec 1 14:41:05 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Data{'country'}{'id'} = '0250'; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Country_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/Country_Retired.pm index c349bb9..c07e44a 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Country_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Country_Retired.pm @@ -3,7 +3,7 @@ Locale::Codes::Country_Retired; # This file was automatically generated. Any changes to this file will # be lost the next time 'deprecate_codes' is run. -# Generated on: Tue Sep 1 09:20:52 EDT 2015 +# Generated on: Tue Dec 1 14:45:28 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'country'}{'alpha-2'}{'code'} = { q(an) => q(Netherlands Antilles), diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Currency.pm b/cpan/Locale-Codes/lib/Locale/Codes/Currency.pm index 4a4648c..3d1f406 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Currency.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Currency.pm @@ -22,7 +22,7 @@ use Locale::Codes::Currency_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2currency currency2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Currency_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/Currency_Codes.pm index e5c317d..d1d39b7 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Currency_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Currency_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::Currency_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:16:18 EDT 2015 +# Generated on: Tue Dec 1 14:44:02 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Data{'currency'}{'id'} = '0177'; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Currency_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/Currency_Retired.pm index 8c50f94..483238f 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Currency_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Currency_Retired.pm @@ -3,7 +3,7 @@ Locale::Codes::Currency_Retired; # This file was automatically generated. Any changes to this file will # be lost the next time 'deprecate_codes' is run. -# Generated on: Tue Sep 1 09:20:52 EDT 2015 +# Generated on: Tue Dec 1 14:45:28 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'currency'}{'alpha'}{'code'} = { q(ADP) => q(Andorran Peseta), diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangExt.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangExt.pm index 6f0910a..6376da2 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangExt.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangExt.pm @@ -20,7 +20,7 @@ use Locale::Codes::LangExt_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2langext langext2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Codes.pm index 96e48b0..e2e1834 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::LangExt_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:16:39 EDT 2015 +# Generated on: Tue Dec 1 14:44:16 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Data{'langext'}{'id'} = '0230'; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Retired.pm index 394f04e..a0ec2d2 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangExt_Retired.pm @@ -3,7 +3,7 @@ Locale::Codes::LangExt_Retired; # This file was automatically generated. Any changes to this file will # be lost the next time 'deprecate_codes' is run. -# Generated on: Tue Sep 1 09:20:52 EDT 2015 +# Generated on: Tue Dec 1 14:45:28 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'langext'}{'alpha'}{'code'} = { q(yds) => q(Yiddish Sign Language), diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangFam.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangFam.pm index 32e224b..fccec91 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangFam.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangFam.pm @@ -20,7 +20,7 @@ use Locale::Codes::LangFam_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2langfam langfam2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Codes.pm index 8760a5e..0908061 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::LangFam_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:17:53 EDT 2015 +# Generated on: Tue Dec 1 14:44:25 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Data{'langfam'}{'id'} = '0116'; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Retired.pm index 4478c10..2fce421 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangFam_Retired.pm @@ -10,7 +10,7 @@ use warnings; require 5.002; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'langfam'}{'alpha'}{'code'} = { }; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangVar.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangVar.pm index 4cd4ec3..23b20cf 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangVar.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangVar.pm @@ -20,7 +20,7 @@ use Locale::Codes::LangVar_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2langvar langvar2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Codes.pm index 5fb4c59..7613dd0 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::LangVar_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:17:43 EDT 2015 +# Generated on: Tue Dec 1 14:44:22 EST 2015 use strict; require 5.006; @@ -11,9 +11,9 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; -$Locale::Codes::Data{'langvar'}{'id'} = '0072'; +$Locale::Codes::Data{'langvar'}{'id'} = '0073'; $Locale::Codes::Data{'langvar'}{'id2names'} = { q(0001) => [ @@ -152,10 +152,13 @@ $Locale::Codes::Data{'langvar'}{'id2names'} = { q(Nadiza dialect), ], q(0044) => [ + q(Newfoundland English), + ], + q(0045) => [ q(The Gniva dialect of Resian), q(The Njiva dialect of Resian), ], - q(0045) => [ + q(0046) => [ q(Volapuk nulik), q(Volapuk perevidol), q(Volapuk nuladik), @@ -164,89 +167,89 @@ $Locale::Codes::Data{'langvar'}{'id2names'} = { q(Revised Volapuk), q(Modern Volapuk), ], - q(0046) => [ + q(0047) => [ q(The Oseacco dialect of Resian), q(The Osojane dialect of Resian), ], - q(0047) => [ + q(0048) => [ q(Oxford English Dictionary spelling), ], - q(0048) => [ + q(0049) => [ q(Pamaka dialect), ], - q(0049) => [ + q(0050) => [ q(Petrine orthography), ], - q(0050) => [ + q(0051) => [ q(Pinyin romanization), ], - q(0051) => [ + q(0052) => [ q(Polytonic Greek), ], - q(0052) => [ + q(0053) => [ q(Puter idiom of Romansh), ], - q(0053) => [ + q(0054) => [ q(Volapuk rigik), q(Schleyer's Volapuk), q(Original Volapuk), q(Classic Volapuk), ], - q(0054) => [ + q(0055) => [ q(Resian), q(Resianic), q(Rezijan), ], - q(0055) => [ + q(0056) => [ q(Rumantsch Grischun), ], - q(0056) => [ + q(0057) => [ q(Scottish Standard English), ], - q(0057) => [ + q(0058) => [ q(Scouse), ], - q(0058) => [ + q(0059) => [ q(The Stolvizza dialect of Resian), q(The Solbica dialect of Resian), ], - q(0059) => [ + q(0060) => [ q(The Sotavento dialect group of Kabuverdianu), ], - q(0060) => [ + q(0061) => [ q(Surmiran idiom of Romansh), ], - q(0061) => [ + q(0062) => [ q(Sursilvan idiom of Romansh), ], - q(0062) => [ + q(0063) => [ q(Sutsilvan idiom of Romansh), ], - q(0063) => [ + q(0064) => [ q(Belarusian in Taraskievica orthography), ], - q(0064) => [ + q(0065) => [ q(Unified Cornish orthography of Revived Cornish), ], - q(0065) => [ + q(0066) => [ q(Unified Cornish Revised orthography of Revived Cornish), ], - q(0066) => [ + q(0067) => [ q(Ulster dialect of Scots), ], - q(0067) => [ + q(0068) => [ q(Unifon phonetic alphabet), ], - q(0068) => [ + q(0069) => [ q(Vedic Sanskrit), ], - q(0069) => [ + q(0070) => [ q(Valencian), ], - q(0070) => [ + q(0071) => [ q(Vallader idiom of Romansh), ], - q(0071) => [ + q(0072) => [ q(Wade-Giles romanization), ], }; @@ -269,7 +272,7 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(1), ], q(belarusian in taraskievica orthography) => [ - q(0063), + q(0064), q(0), ], q(biscayan dialect of basque) => [ @@ -289,7 +292,7 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(classic volapuk) => [ - q(0053), + q(0054), q(3), ], q(classical sanskrit) => [ @@ -301,7 +304,7 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(de jong's volapuk) => [ - q(0045), + q(0046), q(3), ], q(early modern english (1500-1700)) => [ @@ -349,7 +352,7 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(modern volapuk) => [ - q(0045), + q(0046), q(6), ], q(monotonic greek) => [ @@ -369,15 +372,19 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(new volapuk) => [ - q(0045), + q(0046), q(4), ], + q(newfoundland english) => [ + q(0044), + q(0), + ], q(norwegian in hognorsk (high norwegian) orthography) => [ q(0029), q(0), ], q(original volapuk) => [ - q(0053), + q(0054), q(2), ], q(orthographic formulation of 1943 - official in brazil (formulario ortografico de 1943 - oficial no brasil)) => [ @@ -385,23 +392,23 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(oxford english dictionary spelling) => [ - q(0047), + q(0048), q(0), ], q(pamaka dialect) => [ - q(0048), + q(0049), q(0), ], q(petrine orthography) => [ - q(0049), + q(0050), q(0), ], q(pinyin romanization) => [ - q(0050), + q(0051), q(0), ], q(polytonic greek) => [ - q(0051), + q(0052), q(0), ], q(portuguese language orthographic agreement of 1990 (acordo ortografico da lingua portuguesa de 1990)) => [ @@ -417,39 +424,39 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(puter idiom of romansh) => [ - q(0052), + q(0053), q(0), ], q(resian) => [ - q(0054), + q(0055), q(0), ], q(resianic) => [ - q(0054), + q(0055), q(1), ], q(revised volapuk) => [ - q(0045), + q(0046), q(5), ], q(rezijan) => [ - q(0054), + q(0055), q(2), ], q(rumantsch grischun) => [ - q(0055), + q(0056), q(0), ], q(schleyer's volapuk) => [ - q(0053), + q(0054), q(1), ], q(scottish standard english) => [ - q(0056), + q(0057), q(0), ], q(scouse) => [ - q(0057), + q(0058), q(0), ], q(serbian with ekavian pronunciation) => [ @@ -481,15 +488,15 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(surmiran idiom of romansh) => [ - q(0060), + q(0061), q(0), ], q(sursilvan idiom of romansh) => [ - q(0061), + q(0062), q(0), ], q(sutsilvan idiom of romansh) => [ - q(0062), + q(0063), q(0), ], q(the balanka dialect of anii) => [ @@ -505,7 +512,7 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(1), ], q(the gniva dialect of resian) => [ - q(0044), + q(0045), q(0), ], q(the kociewie dialect of polish) => [ @@ -521,15 +528,15 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(1), ], q(the njiva dialect of resian) => [ - q(0044), + q(0045), q(1), ], q(the oseacco dialect of resian) => [ - q(0046), + q(0047), q(0), ], q(the osojane dialect of resian) => [ - q(0046), + q(0047), q(1), ], q(the san giorgio dialect of resian) => [ @@ -537,15 +544,15 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(the solbica dialect of resian) => [ - q(0058), + q(0059), q(1), ], q(the sotavento dialect group of kabuverdianu) => [ - q(0059), + q(0060), q(0), ], q(the stolvizza dialect of resian) => [ - q(0058), + q(0059), q(0), ], q(traditional german orthography) => [ @@ -553,15 +560,15 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(ulster dialect of scots) => [ - q(0066), + q(0067), q(0), ], q(unified cornish orthography of revived cornish) => [ - q(0064), + q(0065), q(0), ], q(unified cornish revised orthography of revived cornish) => [ - q(0065), + q(0066), q(0), ], q(unified turkic latin alphabet (historical)) => [ @@ -569,7 +576,7 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(unifon phonetic alphabet) => [ - q(0067), + q(0068), q(0), ], q(uralic phonetic alphabet) => [ @@ -577,35 +584,35 @@ $Locale::Codes::Data{'langvar'}{'alias2id'} = { q(0), ], q(valencian) => [ - q(0069), + q(0070), q(0), ], q(vallader idiom of romansh) => [ - q(0070), + q(0071), q(0), ], q(vedic sanskrit) => [ - q(0068), + q(0069), q(0), ], q(volapuk nuladik) => [ - q(0045), + q(0046), q(2), ], q(volapuk nulik) => [ - q(0045), + q(0046), q(0), ], q(volapuk perevidol) => [ - q(0045), + q(0046), q(1), ], q(volapuk rigik) => [ - q(0053), + q(0054), q(0), ], q(wade-giles romanization) => [ - q(0071), + q(0072), q(0), ], q(western armenian) => [ @@ -792,118 +799,122 @@ $Locale::Codes::Data{'langvar'}{'code2id'} = { q(0043), q(0), ], - q(njiva) => [ + q(newfound) => [ q(0044), q(0), ], - q(nulik) => [ + q(njiva) => [ q(0045), q(0), ], - q(osojs) => [ + q(nulik) => [ q(0046), q(0), ], - q(oxendict) => [ + q(osojs) => [ q(0047), q(0), ], - q(pamaka) => [ + q(oxendict) => [ q(0048), q(0), ], - q(petr1708) => [ + q(pamaka) => [ q(0049), q(0), ], - q(pinyin) => [ + q(petr1708) => [ q(0050), q(0), ], - q(polyton) => [ + q(pinyin) => [ q(0051), q(0), ], - q(puter) => [ + q(polyton) => [ q(0052), q(0), ], - q(rigik) => [ + q(puter) => [ q(0053), q(0), ], - q(rozaj) => [ + q(rigik) => [ q(0054), q(0), ], - q(rumgr) => [ + q(rozaj) => [ q(0055), q(0), ], - q(scotland) => [ + q(rumgr) => [ q(0056), q(0), ], - q(scouse) => [ + q(scotland) => [ q(0057), q(0), ], - q(solba) => [ + q(scouse) => [ q(0058), q(0), ], - q(sotav) => [ + q(solba) => [ q(0059), q(0), ], - q(surmiran) => [ + q(sotav) => [ q(0060), q(0), ], - q(sursilv) => [ + q(surmiran) => [ q(0061), q(0), ], - q(sutsilv) => [ + q(sursilv) => [ q(0062), q(0), ], - q(tarask) => [ + q(sutsilv) => [ q(0063), q(0), ], - q(uccor) => [ + q(tarask) => [ q(0064), q(0), ], - q(ucrcor) => [ + q(uccor) => [ q(0065), q(0), ], - q(ulster) => [ + q(ucrcor) => [ q(0066), q(0), ], - q(unifon) => [ + q(ulster) => [ q(0067), q(0), ], - q(vaidika) => [ + q(unifon) => [ q(0068), q(0), ], - q(valencia) => [ + q(vaidika) => [ q(0069), q(0), ], - q(vallader) => [ + q(valencia) => [ q(0070), q(0), ], - q(wadegile) => [ + q(vallader) => [ q(0071), q(0), ], + q(wadegile) => [ + q(0072), + q(0), + ], }, }; @@ -952,34 +963,35 @@ $Locale::Codes::Data{'langvar'}{'id2code'} = { q(0041) => q(monoton), q(0042) => q(ndyuka), q(0043) => q(nedis), - q(0044) => q(njiva), - q(0045) => q(nulik), - q(0046) => q(osojs), - q(0047) => q(oxendict), - q(0048) => q(pamaka), - q(0049) => q(petr1708), - q(0050) => q(pinyin), - q(0051) => q(polyton), - q(0052) => q(puter), - q(0053) => q(rigik), - q(0054) => q(rozaj), - q(0055) => q(rumgr), - q(0056) => q(scotland), - q(0057) => q(scouse), - q(0058) => q(solba), - q(0059) => q(sotav), - q(0060) => q(surmiran), - q(0061) => q(sursilv), - q(0062) => q(sutsilv), - q(0063) => q(tarask), - q(0064) => q(uccor), - q(0065) => q(ucrcor), - q(0066) => q(ulster), - q(0067) => q(unifon), - q(0068) => q(vaidika), - q(0069) => q(valencia), - q(0070) => q(vallader), - q(0071) => q(wadegile), + q(0044) => q(newfound), + q(0045) => q(njiva), + q(0046) => q(nulik), + q(0047) => q(osojs), + q(0048) => q(oxendict), + q(0049) => q(pamaka), + q(0050) => q(petr1708), + q(0051) => q(pinyin), + q(0052) => q(polyton), + q(0053) => q(puter), + q(0054) => q(rigik), + q(0055) => q(rozaj), + q(0056) => q(rumgr), + q(0057) => q(scotland), + q(0058) => q(scouse), + q(0059) => q(solba), + q(0060) => q(sotav), + q(0061) => q(surmiran), + q(0062) => q(sursilv), + q(0063) => q(sutsilv), + q(0064) => q(tarask), + q(0065) => q(uccor), + q(0066) => q(ucrcor), + q(0067) => q(ulster), + q(0068) => q(unifon), + q(0069) => q(vaidika), + q(0070) => q(valencia), + q(0071) => q(vallader), + q(0072) => q(wadegile), }, }; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Retired.pm index f888ab4..3ebef84 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/LangVar_Retired.pm @@ -3,7 +3,7 @@ Locale::Codes::LangVar_Retired; # This file was automatically generated. Any changes to this file will # be lost the next time 'deprecate_codes' is run. -# Generated on: Tue Sep 1 09:20:52 EDT 2015 +# Generated on: Tue Dec 1 14:45:28 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'langvar'}{'alpha'}{'code'} = { }; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Language.pm b/cpan/Locale-Codes/lib/Locale/Codes/Language.pm index 4ff25e3..4109d6c 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Language.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Language.pm @@ -22,7 +22,7 @@ use Locale::Codes::Language_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2language language2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Language_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/Language_Codes.pm index 5a82490..51abf1b 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Language_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Language_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::Language_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:05:17 EDT 2015 +# Generated on: Tue Dec 1 14:41:14 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Data{'language'}{'id'} = '7976'; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Language_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/Language_Retired.pm index dc8a635..98908dd 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Language_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Language_Retired.pm @@ -3,7 +3,7 @@ Locale::Codes::Language_Retired; # This file was automatically generated. Any changes to this file will # be lost the next time 'deprecate_codes' is run. -# Generated on: Tue Sep 1 09:20:52 EDT 2015 +# Generated on: Tue Dec 1 14:45:28 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'language'}{'alpha-2'}{'code'} = { q(in) => q(Indonesian), diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Script.pm b/cpan/Locale-Codes/lib/Locale/Codes/Script.pm index d181863..b188cf0 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Script.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Script.pm @@ -22,7 +22,7 @@ use Locale::Codes::Script_Retired; our($VERSION,@ISA,@EXPORT,@EXPORT_OK); -$VERSION='3.36'; +$VERSION='3.37'; @ISA = qw(Exporter); @EXPORT = qw(code2script script2code diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Script_Codes.pm b/cpan/Locale-Codes/lib/Locale/Codes/Script_Codes.pm index 96e1824..451a4e4 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Script_Codes.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Script_Codes.pm @@ -3,7 +3,7 @@ Locale::Codes::Script_Codes; # This file was automatically generated. Any changes to this file will # be lost the next time 'harvest_data' is run. -# Generated on: Tue Sep 1 09:16:30 EDT 2015 +# Generated on: Tue Dec 1 14:44:11 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Data{'script'}{'id'} = '0175'; diff --git a/cpan/Locale-Codes/lib/Locale/Codes/Script_Retired.pm b/cpan/Locale-Codes/lib/Locale/Codes/Script_Retired.pm index 0047021..89a0f47 100644 --- a/cpan/Locale-Codes/lib/Locale/Codes/Script_Retired.pm +++ b/cpan/Locale-Codes/lib/Locale/Codes/Script_Retired.pm @@ -3,7 +3,7 @@ Locale::Codes::Script_Retired; # This file was automatically generated. Any changes to this file will # be lost the next time 'deprecate_codes' is run. -# Generated on: Tue Sep 1 09:20:52 EDT 2015 +# Generated on: Tue Dec 1 14:45:28 EST 2015 use strict; require 5.006; @@ -11,7 +11,7 @@ use warnings; use utf8; our($VERSION); -$VERSION='3.36'; +$VERSION='3.37'; $Locale::Codes::Retired{'script'}{'alpha'}{'code'} = { }; diff --git a/cpan/Locale-Codes/lib/Locale/Country.pm b/cpan/Locale-Codes/lib/Locale/Country.pm index 889c57b..4c4d7c1 100644 --- a/cpan/Locale-Codes/lib/Locale/Country.pm +++ b/cpan/Locale-Codes/lib/Locale/Country.pm @@ -10,7 +10,7 @@ use warnings; use Exporter; our $VERSION; -$VERSION='3.36'; +$VERSION='3.37'; our (@ISA,@EXPORT); diff --git a/cpan/Locale-Codes/lib/Locale/Currency.pm b/cpan/Locale-Codes/lib/Locale/Currency.pm index cc2cd24..c930ff9 100644 --- a/cpan/Locale-Codes/lib/Locale/Currency.pm +++ b/cpan/Locale-Codes/lib/Locale/Currency.pm @@ -10,7 +10,7 @@ use warnings; use Exporter; our $VERSION; -$VERSION='3.36'; +$VERSION='3.37'; our (@ISA,@EXPORT); diff --git a/cpan/Locale-Codes/lib/Locale/Language.pm b/cpan/Locale-Codes/lib/Locale/Language.pm index 2861f96..ab4d214 100644 --- a/cpan/Locale-Codes/lib/Locale/Language.pm +++ b/cpan/Locale-Codes/lib/Locale/Language.pm @@ -10,7 +10,7 @@ use warnings; use Exporter; our $VERSION; -$VERSION='3.36'; +$VERSION='3.37'; our (@ISA,@EXPORT); diff --git a/cpan/Locale-Codes/lib/Locale/Script.pm b/cpan/Locale-Codes/lib/Locale/Script.pm index e9e4ecf..1ea0e18 100644 --- a/cpan/Locale-Codes/lib/Locale/Script.pm +++ b/cpan/Locale-Codes/lib/Locale/Script.pm @@ -10,7 +10,7 @@ use warnings; use Exporter; our $VERSION; -$VERSION='3.36'; +$VERSION='3.37'; our (@ISA,@EXPORT); diff --git a/cpan/Locale-Codes/t/code2country.t b/cpan/Locale-Codes/t/code2country.t index bd83e48..4fb69c9 100644 --- a/cpan/Locale-Codes/t/code2country.t +++ b/cpan/Locale-Codes/t/code2country.t @@ -129,6 +129,14 @@ zr ~ _undef_ zr retired ~ Zaire +jp alpha-2 not_retired other_arg ~ _undef_ + +jp _blank_ ~ Japan + +jp alpha-15 ~ _undef_ + +jp alpha-2 retired ~ Japan + "; print "code2country...\n"; diff --git a/cpan/Math-BigInt-FastCalc/FastCalc.xs b/cpan/Math-BigInt-FastCalc/FastCalc.xs index a045c71..eb228e4 100644 --- a/cpan/Math-BigInt-FastCalc/FastCalc.xs +++ b/cpan/Math-BigInt-FastCalc/FastCalc.xs @@ -14,8 +14,8 @@ # define croak_xs_usage croak #endif -double XS_BASE = 0; -double XS_BASE_LEN = 0; +static double XS_BASE = 0; +static double XS_BASE_LEN = 0; MODULE = Math::BigInt::FastCalc PACKAGE = Math::BigInt::FastCalc diff --git a/cpan/Math-BigInt-FastCalc/lib/Math/BigInt/FastCalc.pm b/cpan/Math-BigInt-FastCalc/lib/Math/BigInt/FastCalc.pm index 487d8ad..c73f7c4 100644 --- a/cpan/Math-BigInt-FastCalc/lib/Math/BigInt/FastCalc.pm +++ b/cpan/Math-BigInt-FastCalc/lib/Math/BigInt/FastCalc.pm @@ -6,7 +6,7 @@ use warnings; use Math::BigInt::Calc 1.999706; -our $VERSION = '0.37'; +our $VERSION = '0.38'; ############################################################################## # global constants, flags and accessory diff --git a/cpan/Module-Metadata/lib/Module/Metadata.pm b/cpan/Module-Metadata/lib/Module/Metadata.pm index cc05549..f7017cf 100644 --- a/cpan/Module-Metadata/lib/Module/Metadata.pm +++ b/cpan/Module-Metadata/lib/Module/Metadata.pm @@ -1,6 +1,6 @@ # -*- mode: cperl; tab-width: 8; indent-tabs-mode: nil; basic-offset: 2 -*- # vim:ts=8:sw=2:et:sta:sts=2 -package Module::Metadata; # git description: v1.000029-6-gae0d3b6 +package Module::Metadata; # git description: v1.000030-2-g52f466c # ABSTRACT: Gather package and POD information from perl module files # Adapted from Perl-licensed code originally distributed with @@ -14,7 +14,7 @@ sub __clean_eval { eval $_[0] } use strict; use warnings; -our $VERSION = '1.000030'; # TRIAL +our $VERSION = '1.000031'; # TRIAL use Carp qw/croak/; use File::Spec; @@ -820,7 +820,7 @@ Module::Metadata - Gather package and POD information from perl module files =head1 VERSION -version 1.000030 +version 1.000031 =head1 SYNOPSIS diff --git a/cpan/Module-Metadata/t/extract-version.t b/cpan/Module-Metadata/t/extract-version.t index f1d8d21..278a602 100644 --- a/cpan/Module-Metadata/t/extract-version.t +++ b/cpan/Module-Metadata/t/extract-version.t @@ -669,19 +669,19 @@ foreach my $test_case (@modules) { ok( $test_case->{all_versions}->($pm_info->{versions}), "case '$test_case->{name}': all extracted versions passes match sub" - ) or diag 'found versions: ', explain $pm_info->{versions}; + ); } else { is_deeply( $pm_info->{versions}, $test_case->{all_versions}, 'correctly found all $VERSIONs', - ) or diag 'found versions: ', explain $pm_info->{versions}; + ); } } is( $warnings, '', "case '$test_case->{name}': no warnings from parsing" ) or $errs++; - diag Dumper({ got => $pm_info->version, module_contents => $code }) if $errs; + diag 'extracted versions: ', explain({ got => $pm_info->{versions}, module_contents => $code }) if !$ENV{PERL_CORE} && $errs; } } continue { diff --git a/cpan/Pod-Perldoc/lib/Pod/Perldoc.pm b/cpan/Pod-Perldoc/lib/Pod/Perldoc.pm index 08c443e..84f6624 100644 --- a/cpan/Pod-Perldoc/lib/Pod/Perldoc.pm +++ b/cpan/Pod-Perldoc/lib/Pod/Perldoc.pm @@ -12,7 +12,7 @@ use File::Spec::Functions qw(catfile catdir splitdir); use vars qw($VERSION @Pagers $Bindir $Pod2man $Temp_Files_Created $Temp_File_Lifetime ); -$VERSION = '3.25_01'; # patched in perl5.git +$VERSION = '3.25_02'; # patched in perl5.git $VERSION =~ s/_//; #.......................................................................... @@ -1664,6 +1664,10 @@ sub pagers_guessing { push @pagers, qw( less.exe more.com< ); unshift @pagers, $ENV{PAGER} if $ENV{PAGER}; } + elsif ( $self->is_amigaos) { + push @pagers, qw( /SYS/Utilities/MultiView /SYS/Utilities/More /C/TYPE ); + unshift @pagers, "$ENV{PAGER}" if $ENV{PAGER}; + } else { if ($self->is_os2) { unshift @pagers, 'less', 'cmd /c more <'; diff --git a/cpan/Term-ANSIColor/lib/Term/ANSIColor.pm b/cpan/Term-ANSIColor/lib/Term/ANSIColor.pm index 2101a87..ace4d47 100644 --- a/cpan/Term-ANSIColor/lib/Term/ANSIColor.pm +++ b/cpan/Term-ANSIColor/lib/Term/ANSIColor.pm @@ -1,7 +1,7 @@ # Term::ANSIColor -- Color screen output using ANSI escape sequences. # # Copyright 1996, 1997, 1998, 2000, 2001, 2002, 2005, 2006, 2008, 2009, 2010, -# 2011, 2012, 2013, 2014 Russ Allbery +# 2011, 2012, 2013, 2014, 2015 Russ Allbery # Copyright 1996 Zenin # Copyright 2012 Kurt Starsinic # @@ -40,7 +40,7 @@ our $AUTOLOAD; # against circular module loading (not that we load any modules, but # consistency is good). BEGIN { - $VERSION = '4.03'; + $VERSION = '4.04'; # All of the basic supported constants, used in %EXPORT_TAGS. my @colorlist = qw( @@ -475,7 +475,7 @@ sub colored { # empty segments, and then colorize each of the line sections. if (defined($EACHLINE)) { my @text = map { ($_ ne $EACHLINE) ? $attr . $_ . "\e[0m" : $_ } - grep { length($_) > 0 } + grep { length > 0 } split(m{ (\Q$EACHLINE\E) }xms, $string); return join(q{}, @text); } else { @@ -536,9 +536,9 @@ sub colorstrip { # Returns: True if all the attributes are valid, false otherwise. sub colorvalid { my (@codes) = @_; - @codes = map { split(q{ }, lc($_)) } @codes; + @codes = map { split(q{ }, lc) } @codes; for my $code (@codes) { - if (!defined($ATTRIBUTES{$code}) && !defined($ALIASES{$code})) { + if (!(defined($ATTRIBUTES{$code}) || defined($ALIASES{$code}))) { return; } } @@ -635,9 +635,9 @@ particular features and the versions of Perl that included them. =head2 Supported Colors -Terminal emulators that support color divide into two types: ones that +Terminal emulators that support color divide into three types: ones that support only eight colors, ones that support sixteen, and ones that -support 256. This module provides the ANSI escape codes all of them. +support 256. This module provides the ANSI escape codes for all of them. These colors are referred to as ANSI colors 0 through 7 (normal), 8 through 15 (16-color), and 16 through 255 (256-color). @@ -1190,9 +1190,13 @@ voice solutions. =head1 COPYRIGHT AND LICENSE -Copyright 1996 Zenin. Copyright 1996, 1997, 1998, 2000, 2001, 2002, 2005, -2006, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Russ Allbery -. Copyright 2012 Kurt Starsinic . +Copyright 1996 Zenin + +Copyright 1996, 1997, 1998, 2000, 2001, 2002, 2005, 2006, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015 Russ Allbery + +Copyright 2012 Kurt Starsinic + This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. diff --git a/cpan/Text-ParseWords/t/ParseWords.t b/cpan/Text-ParseWords/t/ParseWords.t index 3fb7ac6..905ea00 100644 --- a/cpan/Text-ParseWords/t/ParseWords.t +++ b/cpan/Text-ParseWords/t/ParseWords.t @@ -1,122 +1,122 @@ -#!./perl - -use warnings; -use Text::ParseWords; -use Test::More tests => 27; - -@words = shellwords(qq(foo "bar quiz" zoo)); -is($words[0], 'foo'); -is($words[1], 'bar quiz'); -is($words[2], 'zoo'); - -{ - # Gonna get some undefined things back - no warnings 'uninitialized' ; - - # Test quotewords() with other parameters and null last field - @words = quotewords(':+', 1, 'foo:::"bar:foo":zoo zoo:'); - is(join(";", @words), qq(foo;"bar:foo";zoo zoo;)); -} - -# Test $keep eq 'delimiters' and last field zero -@words = quotewords('\s+', 'delimiters', '4 3 2 1 0'); -is(join(";", @words), qq(4; ;3; ;2; ;1; ;0)); - -# Big ol' nasty test (thanks, Joerk!) -$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd" eee\\\\\\"ffff" "gg"'; - -# First with $keep == 1 -$result = join('|', parse_line('\s+', 1, $string)); -is($result, 'aaaa"bbbbb"|cc\\ cc|\\\\\\"dddd" eee\\\\\\"ffff"|"gg"'); - -# Now, $keep == 0 -$result = join('|', parse_line('\s+', 0, $string)); -is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\"ffff|gg'); - -# Now test single quote behavior -$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd\' eee\\\\\\"ffff\' gg'; -$result = join('|', parse_line('\s+', 0, $string)); -is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\\\\\"ffff|gg'); - -# Make sure @nested_quotewords does the right thing -@lists = nested_quotewords('\s+', 0, 'a b c', '1 2 3', 'x y z'); -is (@lists, 3); -is (@{$lists[0]}, 3); -is (@{$lists[1]}, 3); -is (@{$lists[2]}, 3); - -# Now test error return -$string = 'foo bar baz"bach blech boop'; - -@words = shellwords($string); -is(@words, 0); - -@words = parse_line('s+', 0, $string); -is(@words, 0); - -@words = quotewords('s+', 0, $string); -is(@words, 0); - -{ - # Gonna get some more undefined things back - no warnings 'uninitialized' ; - - @words = nested_quotewords('s+', 0, $string); - is(@words, 0); - - # Now test empty fields - $result = join('|', parse_line(':', 0, 'foo::0:"":::')); - is($result, 'foo||0||||'); - - # Test for 0 in quotes without $keep - $result = join('|', parse_line(':', 0, ':"0":')); - is($result, '|0|'); - - # Test for \001 in quoted string - $result = join('|', parse_line(':', 0, ':"' . "\001" . '":')); - is($result, "|\1|"); - -} - -# Now test perlish single quote behavior -$Text::ParseWords::PERL_SINGLE_QUOTE = 1; -$string = 'aaaa"bbbbb" cc\ cc \\\\\"dddd\' eee\\\\\"\\\'ffff\' gg'; -$result = join('|', parse_line('\s+', 0, $string)); -is($result, 'aaaabbbbb|cc cc|\"dddd eee\\\\"\'ffff|gg'); - -# test whitespace in the delimiters -@words = quotewords(' ', 1, '4 3 2 1 0'); -is(join(";", @words), qq(4;3;2;1;0)); - -# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text -$string = qq{"field1" "field2\\\nstill field2" "field3"}; - -$result = join('|', parse_line("\t", 1, $string)); -is($result, qq{"field1"|"field2\\\nstill field2"|"field3"}); - -$result = join('|', parse_line("\t", 0, $string)); -is($result, "field1|field2\nstill field2|field3"); - -SKIP: { # unicode - skip "No unicode",1 if $]<5.008; - $string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"}; - $result = join('|', parse_line("\x{1234}", 0, $string)); - is($result, "field1|field2\x{1234}still field2|field3",'Unicode'); -} - -# missing quote after matching regex used to hang after change #22997 -"1234" =~ /(1)(2)(3)(4)/; -$string = qq{"missing quote}; -$result = join('|', shellwords($string)); -is($result, ""); - -# make sure shellwords strips out leading whitespace and trailng undefs -# from parse_line, so it's behavior is more like /bin/sh -$result = join('|', shellwords(" aa \\ \\ bb ", " \\ ", "cc dd ee\\ ")); -is($result, "aa| | bb| |cc|dd|ee "); - -$SIG{ALRM} = sub {die "Timeout!"}; -alarm(3); -@words = Text::ParseWords::old_shellwords("foo\\"); -is(@words, 1); -alarm(0); +#!./perl + +use warnings; +use Text::ParseWords; +use Test::More tests => 27; + +@words = shellwords(qq(foo "bar quiz" zoo)); +is($words[0], 'foo'); +is($words[1], 'bar quiz'); +is($words[2], 'zoo'); + +{ + # Gonna get some undefined things back + no warnings 'uninitialized' ; + + # Test quotewords() with other parameters and null last field + @words = quotewords(':+', 1, 'foo:::"bar:foo":zoo zoo:'); + is(join(";", @words), qq(foo;"bar:foo";zoo zoo;)); +} + +# Test $keep eq 'delimiters' and last field zero +@words = quotewords('\s+', 'delimiters', '4 3 2 1 0'); +is(join(";", @words), qq(4; ;3; ;2; ;1; ;0)); + +# Big ol' nasty test (thanks, Joerk!) +$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd" eee\\\\\\"ffff" "gg"'; + +# First with $keep == 1 +$result = join('|', parse_line('\s+', 1, $string)); +is($result, 'aaaa"bbbbb"|cc\\ cc|\\\\\\"dddd" eee\\\\\\"ffff"|"gg"'); + +# Now, $keep == 0 +$result = join('|', parse_line('\s+', 0, $string)); +is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\"ffff|gg'); + +# Now test single quote behavior +$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd\' eee\\\\\\"ffff\' gg'; +$result = join('|', parse_line('\s+', 0, $string)); +is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\\\\\"ffff|gg'); + +# Make sure @nested_quotewords does the right thing +@lists = nested_quotewords('\s+', 0, 'a b c', '1 2 3', 'x y z'); +is (@lists, 3); +is (@{$lists[0]}, 3); +is (@{$lists[1]}, 3); +is (@{$lists[2]}, 3); + +# Now test error return +$string = 'foo bar baz"bach blech boop'; + +@words = shellwords($string); +is(@words, 0); + +@words = parse_line('s+', 0, $string); +is(@words, 0); + +@words = quotewords('s+', 0, $string); +is(@words, 0); + +{ + # Gonna get some more undefined things back + no warnings 'uninitialized' ; + + @words = nested_quotewords('s+', 0, $string); + is(@words, 0); + + # Now test empty fields + $result = join('|', parse_line(':', 0, 'foo::0:"":::')); + is($result, 'foo||0||||'); + + # Test for 0 in quotes without $keep + $result = join('|', parse_line(':', 0, ':"0":')); + is($result, '|0|'); + + # Test for \001 in quoted string + $result = join('|', parse_line(':', 0, ':"' . "\001" . '":')); + is($result, "|\1|"); + +} + +# Now test perlish single quote behavior +$Text::ParseWords::PERL_SINGLE_QUOTE = 1; +$string = 'aaaa"bbbbb" cc\ cc \\\\\"dddd\' eee\\\\\"\\\'ffff\' gg'; +$result = join('|', parse_line('\s+', 0, $string)); +is($result, 'aaaabbbbb|cc cc|\"dddd eee\\\\"\'ffff|gg'); + +# test whitespace in the delimiters +@words = quotewords(' ', 1, '4 3 2 1 0'); +is(join(";", @words), qq(4;3;2;1;0)); + +# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text +$string = qq{"field1" "field2\\\nstill field2" "field3"}; + +$result = join('|', parse_line("\t", 1, $string)); +is($result, qq{"field1"|"field2\\\nstill field2"|"field3"}); + +$result = join('|', parse_line("\t", 0, $string)); +is($result, "field1|field2\nstill field2|field3"); + +SKIP: { # unicode + skip "No unicode",1 if $]<5.008; + $string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"}; + $result = join('|', parse_line("\x{1234}", 0, $string)); + is($result, "field1|field2\x{1234}still field2|field3",'Unicode'); +} + +# missing quote after matching regex used to hang after change #22997 +"1234" =~ /(1)(2)(3)(4)/; +$string = qq{"missing quote}; +$result = join('|', shellwords($string)); +is($result, ""); + +# make sure shellwords strips out leading whitespace and trailng undefs +# from parse_line, so it's behavior is more like /bin/sh +$result = join('|', shellwords(" aa \\ \\ bb ", " \\ ", "cc dd ee\\ ")); +is($result, "aa| | bb| |cc|dd|ee "); + +$SIG{ALRM} = sub {die "Timeout!"}; +alarm(3); +@words = Text::ParseWords::old_shellwords("foo\\"); +is(@words, 1); +alarm(0); diff --git a/cpan/Unicode-Normalize/Makefile.PL b/cpan/Unicode-Normalize/Makefile.PL index d25e92d..44a4b8d 100644 --- a/cpan/Unicode-Normalize/Makefile.PL +++ b/cpan/Unicode-Normalize/Makefile.PL @@ -38,6 +38,7 @@ WriteMakefile( warnings => 0, SelectSaver => 0, }, + ($mm_ver < 6.48 ? () : MIN_PERL_VERSION => 5.6.0), ($mm_ver < 6.46 ? () : (META_MERGE => { 'meta-spec' => { version => 2 }, resources => { diff --git a/cpan/Unicode-Normalize/Normalize.pm b/cpan/Unicode-Normalize/Normalize.pm index 62c02b1..61b04ea 100644 --- a/cpan/Unicode-Normalize/Normalize.pm +++ b/cpan/Unicode-Normalize/Normalize.pm @@ -16,7 +16,7 @@ use Carp; no warnings 'utf8'; -our $VERSION = '1.23'; +our $VERSION = '1.24'; our $PACKAGE = __PACKAGE__; our @EXPORT = qw( NFC NFD NFKC NFKD ); @@ -597,6 +597,8 @@ Currently maintained by Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved. +=head1 LICENSE + This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/cpan/Unicode-Normalize/Normalize.xs b/cpan/Unicode-Normalize/Normalize.xs index 36e20b0..fd67a19 100644 --- a/cpan/Unicode-Normalize/Normalize.xs +++ b/cpan/Unicode-Normalize/Normalize.xs @@ -810,8 +810,8 @@ isComp2nd(uv) ALIAS: isNFC_MAYBE = 1 isNFKC_MAYBE = 2 - - + INIT: + PERL_UNUSED_VAR(ix); SV* isNFD_NO(uv) diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm index e884298..ace9b78 100644 --- a/dist/Data-Dumper/Dumper.pm +++ b/dist/Data-Dumper/Dumper.pm @@ -10,7 +10,7 @@ package Data::Dumper; BEGIN { - $VERSION = '2.158'; # Don't forget to set version and release + $VERSION = '2.159'; # Don't forget to set version and release } # date in POD below! #$| = 1; @@ -41,6 +41,7 @@ my $IS_ASCII = ord 'A' == 65; # module vars and their defaults $Indent = 2 unless defined $Indent; +$Trailingcomma = 0 unless defined $Trailingcomma; $Purity = 0 unless defined $Purity; $Pad = "" unless defined $Pad; $Varname = "VAR" unless defined $Varname; @@ -76,6 +77,7 @@ sub new { my($s) = { level => 0, # current recursive depth indent => $Indent, # various styles of indenting + trailingcomma => $Trailingcomma, # whether to add comma after last elem pad => $Pad, # all lines prefixed by this string xpad => "", # padding-per-level apad => "", # added padding for hash keys n such @@ -413,7 +415,9 @@ sub _dump { $out .= $pad . $ipad . '#' . $i if $s->{indent} >= 3; $out .= $pad . $ipad . $s->_dump($v, $sname); - $out .= "," if $i++ < $#$val; + $out .= "," + if $i++ < $#$val + || ($s->{trailingcomma} && $s->{indent} >= 1); } $out .= $pad . ($s->{xpad} x ($s->{level} - 1)) if $i; $out .= ($name =~ /^\@/) ? ')' : ']'; @@ -473,7 +477,7 @@ sub _dump { if $s->{indent} >= 2; } if (substr($out, -1) eq ',') { - chop $out; + chop $out if !$s->{trailingcomma} || !$s->{indent}; $out .= $pad . ($s->{xpad} x ($s->{level} - 1)); } $out .= ($name =~ /^\%/) ? ')' : '}'; @@ -633,6 +637,11 @@ sub Indent { } } +sub Trailingcomma { + my($s, $v) = @_; + defined($v) ? (($s->{trailingcomma} = $v), return $s) : $s->{trailingcomma}; +} + sub Pair { my($s, $v) = @_; defined($v) ? (($s->{pair} = $v), return $s) : $s->{pair}; @@ -1032,6 +1041,15 @@ consumes twice the number of lines). Style 2 is the default. =item * +$Data::Dumper::Trailingcomma I I<$OBJ>->Trailingcomma(I<[NEWVAL]>) + +Controls whether a comma is added after the last element of an array or +hash. Even when true, no comma is added between the last element of an array +or hash and a closing bracket when they appear on the same line. The default +is false. + +=item * + $Data::Dumper::Purity I I<$OBJ>->Purity(I<[NEWVAL]>) Controls the degree to which the output can be Ced to recreate the @@ -1454,7 +1472,7 @@ modify it under the same terms as Perl itself. =head1 VERSION -Version 2.158 (March 13 2015) +Version 2.159 (December 15 2015) =head1 SEE ALSO diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs index 97277f4..3440891 100644 --- a/dist/Data-Dumper/Dumper.xs +++ b/dist/Data-Dumper/Dumper.xs @@ -41,6 +41,30 @@ || (((UV) (c)) >= '0' && ((UV) (c)) <= '9')) #endif +/* This struct contains almost all the user's desired configuration, and it + * is treated as constant by the recursive function. This arrangement has + * the advantage of needing less memory than passing all of them on the + * stack all the time (as was the case in an earlier implementation). */ +typedef struct { + SV *pad; + SV *xpad; + SV *sep; + SV *pair; + SV *sortkeys; + SV *freezer; + SV *toaster; + SV *bless; + IV maxrecurse; + I32 indent; + I32 purity; + I32 deepcopy; + I32 quotekeys; + I32 maxdepth; + I32 useqq; + int use_sparse_seen_hash; + int trailingcomma; +} Style; + static I32 num_q (const char *s, STRLEN slen); static I32 esc_q (char *dest, const char *src, STRLEN slen); static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq); @@ -49,11 +73,8 @@ static bool key_needs_quote(const char *s, STRLEN len); static bool safe_decimal_number(const char *p, STRLEN len); static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, - HV *seenhv, AV *postav, I32 *levelp, I32 indent, - SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, - SV *freezer, SV *toaster, - I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, - I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq, IV maxrecurse); + HV *seenhv, AV *postav, const I32 level, SV *apad, + const Style *style); #ifndef HvNAME_get #define HvNAME_get HvNAME @@ -491,10 +512,7 @@ sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n) */ static I32 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, - AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, - SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, - I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys, - int use_sparse_seen_hash, I32 useqq, IV maxrecurse) + AV *postav, const I32 level, SV *apad, const Style *style) { char tmpbuf[128]; Size_t i; @@ -537,14 +555,14 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, /* If a freeze method is provided and the object has it, call it. Warn on errors. */ - if (SvOBJECT(SvRV(val)) && freezer && - SvPOK(freezer) && SvCUR(freezer) && - gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), - SvCUR(freezer), -1) != NULL) + if (SvOBJECT(SvRV(val)) && style->freezer && + SvPOK(style->freezer) && SvCUR(style->freezer) && + gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(style->freezer), + SvCUR(style->freezer), -1) != NULL) { dSP; ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(val); PUTBACK; - i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID|G_DISCARD); + i = perl_call_method(SvPVX_const(style->freezer), G_EVAL|G_VOID|G_DISCARD); SPAGAIN; if (SvTRUE(ERRSV)) warn("WARNING(Freezer method call failed): %"SVf"", ERRSV); @@ -575,7 +593,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)) { - if (purity && *levelp > 0) { + if (style->purity && level > 0) { SV *postentry; if (realtype == SVt_PVHV) @@ -662,7 +680,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, * representation of the thing we are currently examining * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)'). */ - if (!purity && maxdepth > 0 && *levelp >= maxdepth) { + if (!style->purity && style->maxdepth > 0 && level >= style->maxdepth) { STRLEN vallen; const char * const valstr = SvPV(val,vallen); sv_catpvs(retval, "'"); @@ -671,24 +689,23 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, return 1; } - if (maxrecurse > 0 && *levelp >= maxrecurse) { - croak("Recursion limit of %" IVdf " exceeded", maxrecurse); + if (style->maxrecurse > 0 && level >= style->maxrecurse) { + croak("Recursion limit of %" IVdf " exceeded", style->maxrecurse); } if (realpack && !no_bless) { /* we have a blessed ref */ STRLEN blesslen; - const char * const blessstr = SvPV(bless, blesslen); + const char * const blessstr = SvPV(style->bless, blesslen); sv_catpvn(retval, blessstr, blesslen); sv_catpvs(retval, "( "); - if (indent >= 2) { + if (style->indent >= 2) { blesspad = apad; apad = newSVsv(apad); sv_x(aTHX_ apad, " ", 1, blesslen+2); } } - (*levelp)++; - ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp); + ipad = sv_x(aTHX_ Nullsv, SvPVX_const(style->xpad), SvCUR(style->xpad), level+1); if (is_regex) { @@ -759,19 +776,13 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if (realpack) { /* blessed */ sv_catpvs(retval, "do{\\(my $o = "); DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, - postav, levelp, indent, pad, xpad, apad, sep, pair, - freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash, useqq, - maxrecurse); + postav, level+1, apad, style); sv_catpvs(retval, ")}"); } /* plain */ else { sv_catpvs(retval, "\\"); DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, - postav, levelp, indent, pad, xpad, apad, sep, pair, - freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash, useqq, - maxrecurse); + postav, level+1, apad, style); } SvREFCNT_dec(namesv); } @@ -781,10 +792,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvs(namesv, "}"); sv_catpvs(retval, "\\"); DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, - postav, levelp, indent, pad, xpad, apad, sep, pair, - freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash, useqq, - maxrecurse); + postav, level+1, apad, style); SvREFCNT_dec(namesv); } else if (realtype == SVt_PVAV) { @@ -824,8 +832,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, iname[inamelen++] = '-'; iname[inamelen++] = '>'; } iname[inamelen++] = '['; iname[inamelen] = '\0'; - totpad = newSVsv(sep); - sv_catsv(totpad, pad); + totpad = newSVsv(style->sep); + sv_catsv(totpad, style->pad); sv_catsv(totpad, apad); for (ix = 0; ix <= ixmax; ++ix) { @@ -846,7 +854,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix); #endif iname[ilen++] = ']'; iname[ilen] = '\0'; - if (indent >= 3) { + if (style->indent >= 3) { sv_catsv(retval, totpad); sv_catsv(retval, ipad); sv_catpvs(retval, "#"); @@ -855,15 +863,12 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catsv(retval, totpad); sv_catsv(retval, ipad); DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav, - levelp, indent, pad, xpad, apad, sep, pair, - freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash, - useqq, maxrecurse); - if (ix < ixmax) + level+1, apad, style); + if (ix < ixmax || (style->trailingcomma && style->indent >= 1)) sv_catpvs(retval, ","); } if (ixmax >= 0) { - SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1); + SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(style->xpad), SvCUR(style->xpad), level); sv_catsv(retval, totpad); sv_catsv(retval, opad); SvREFCNT_dec(opad); @@ -909,16 +914,14 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvs(iname, "->"); } sv_catpvs(iname, "{"); - totpad = newSVsv(sep); - sv_catsv(totpad, pad); + totpad = newSVsv(style->sep); + sv_catsv(totpad, style->pad); sv_catsv(totpad, apad); /* If requested, get a sorted/filtered array of hash keys */ - if (sortkeys) { - if (sortkeys == &PL_sv_yes) { -#if PERL_VERSION < 8 - sortkeys = sv_2mortal(newSVpvs("Data::Dumper::_sortkeys")); -#else + if (style->sortkeys) { +#if PERL_VERSION >= 8 + if (style->sortkeys == &PL_sv_yes) { keys = newAV(); (void)hv_iterinit((HV*)ival); while ((entry = hv_iternext((HV*)ival))) { @@ -939,17 +942,18 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } else # endif -#endif { sortsv(AvARRAY(keys), av_len(keys)+1, Perl_sv_cmp); } } - if (sortkeys != &PL_sv_yes) { + else +#endif + { dSP; ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK; - i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL); + i = perl_call_sv(style->sortkeys, G_SCALAR | G_EVAL); SPAGAIN; if (i) { sv = POPs; @@ -976,7 +980,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, I32 nlen; bool do_utf8 = FALSE; - if (sortkeys) { + if (style->sortkeys) { if (!(keys && (SSize_t)i <= av_len(keys))) break; } else { if (!(entry = hv_iternext((HV *)ival))) break; @@ -985,7 +989,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if (i) sv_catpvs(retval, ","); - if (sortkeys) { + if (style->sortkeys) { char *key; svp = av_fetch(keys, i, FALSE); keysv = svp ? *svp : sv_newmortal(); @@ -1022,10 +1026,10 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, their handling of key quoting compatible between XS and perl. */ - if (quotekeys || key_needs_quote(key,keylen)) { - if (do_utf8 || useqq) { + if (style->quotekeys || key_needs_quote(key,keylen)) { + if (do_utf8 || style->useqq) { STRLEN ocur = SvCUR(retval); - nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq); + nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, style->useqq); nkey = SvPVX(retval) + ocur; } else { @@ -1052,8 +1056,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvn(sname, nkey, nlen); sv_catpvs(sname, "}"); - sv_catsv(retval, pair); - if (indent >= 2) { + sv_catsv(retval, style->pair); + if (style->indent >= 2) { char *extra; I32 elen = 0; newapad = newSVsv(apad); @@ -1068,17 +1072,17 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, newapad = apad; DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv, - postav, levelp, indent, pad, xpad, newapad, sep, pair, - freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash, useqq, - maxrecurse); + postav, level+1, newapad, style); SvREFCNT_dec(sname); Safefree(nkey_buffer); - if (indent >= 2) + if (style->indent >= 2) SvREFCNT_dec(newapad); } if (i) { - SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1); + SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(style->xpad), + SvCUR(style->xpad), level); + if (style->trailingcomma && style->indent >= 1) + sv_catpvs(retval, ","); sv_catsv(retval, totpad); sv_catsv(retval, opad); SvREFCNT_dec(opad); @@ -1092,7 +1096,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } else if (realtype == SVt_PVCV) { sv_catpvs(retval, "sub { \"DUMMY\" }"); - if (purity) + if (style->purity) warn("Encountered CODE ref, using dummy placeholder"); } else { @@ -1103,7 +1107,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, I32 plen; I32 pticks; - if (indent >= 2) { + if (style->indent >= 2) { SvREFCNT_dec(apad); apad = blesspad; } @@ -1127,14 +1131,13 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvn(retval, realpack, strlen(realpack)); } sv_catpvs(retval, "' )"); - if (toaster && SvPOK(toaster) && SvCUR(toaster)) { + if (style->toaster && SvPOK(style->toaster) && SvCUR(style->toaster)) { sv_catpvs(retval, "->"); - sv_catsv(retval, toaster); + sv_catsv(retval, style->toaster); sv_catpvs(retval, "()"); } } SvREFCNT_dec(ipad); - (*levelp)--; } else { STRLEN i; @@ -1168,7 +1171,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, * there is no other reference, duh. This is an optimization. * Note that we'd have to check for weak-refs, too, but this is * already the branch for non-refs only. */ - else if (val != &PL_sv_undef && (!use_sparse_seen_hash || SvREFCNT(val) > 1)) { + else if (val != &PL_sv_undef && (!style->use_sparse_seen_hash || SvREFCNT(val) > 1)) { SV * const namesv = newSVpvs("\\"); sv_catpvn(namesv, name, namelen); seenentry = newAV(); @@ -1219,7 +1222,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, r = SvPVX(retval)+SvCUR(retval); r[0] = '*'; r[1] = '{'; SvCUR_set(retval, SvCUR(retval)+2); - esc_q_utf8(aTHX_ retval, c, i, 1, useqq); + esc_q_utf8(aTHX_ retval, c, i, 1, style->useqq); sv_grow(retval, SvCUR(retval)+2); r = SvPVX(retval)+SvCUR(retval); r[0] = '}'; r[1] = '\0'; @@ -1245,7 +1248,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } SvCUR_set(retval, SvCUR(retval)+i); - if (purity) { + if (style->purity) { static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" }; static const STRLEN sizes[] = { 8, 7, 6 }; SV *e; @@ -1262,7 +1265,6 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, continue; { - I32 nlevel = 0; SV *postentry = newSVpvn(r,i); sv_setsv(nname, postentry); @@ -1272,15 +1274,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, e = newRV_inc(e); SvCUR_set(newapad, 0); - if (indent >= 2) + if (style->indent >= 2) (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry)); DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry, - seenhv, postav, &nlevel, indent, pad, xpad, - newapad, sep, pair, freezer, toaster, purity, - deepcopy, quotekeys, bless, maxdepth, - sortkeys, use_sparse_seen_hash, useqq, - maxrecurse); + seenhv, postav, 0, newapad, style); SvREFCNT_dec(e); } } @@ -1315,11 +1313,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, * the pure perl code. * see [perl #74798] */ - if (useqq && safe_decimal_number(c, i)) { + if (style->useqq && safe_decimal_number(c, i)) { sv_catsv(retval, val); } - else if (DO_UTF8(val) || useqq) - i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq); + else if (DO_UTF8(val) || style->useqq) + i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), style->useqq); else { sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ r = SvPVX(retval) + SvCUR(retval); @@ -1334,7 +1332,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } if (idlen) { - if (deepcopy) + if (style->deepcopy) (void)hv_delete(seenhv, id, idlen, G_DISCARD); else if (namelen && seenentry) { SV *mark = *av_fetch(seenentry, 2, TRUE); @@ -1363,17 +1361,15 @@ Data_Dumper_Dumpxs(href, ...) SV *retval, *valstr; HV *seenhv = NULL; AV *postav, *todumpav, *namesav; - I32 level = 0; - I32 indent, terse, useqq; + I32 terse = 0; SSize_t i, imax, postlen; SV **svp; - SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname; - SV *freezer, *toaster, *bless, *sortkeys; - I32 purity, deepcopy, quotekeys, maxdepth = 0; - IV maxrecurse = 1000; + SV *apad = &PL_sv_undef; + Style style; + + SV *name, *val = &PL_sv_undef, *varname = &PL_sv_undef; char tmpbuf[1024]; I32 gimme = GIMME_V; - int use_sparse_seen_hash = 0; if (!SvROK(href)) { /* call new to get an object first */ if (items < 2) @@ -1402,13 +1398,15 @@ Data_Dumper_Dumpxs(href, ...) } todumpav = namesav = NULL; + style.indent = 2; + style.quotekeys = 1; + style.maxrecurse = 1000; + style.purity = style.deepcopy = style.useqq = style.maxdepth + = style.use_sparse_seen_hash = style.trailingcomma = 0; + style.pad = style.xpad = style.sep = style.pair = style.sortkeys + = style.freezer = style.toaster = style.bless = &PL_sv_undef; seenhv = NULL; - val = pad = xpad = apad = sep = pair = varname - = freezer = toaster = bless = sortkeys = &PL_sv_undef; name = sv_newmortal(); - indent = 2; - terse = purity = deepcopy = useqq = 0; - quotekeys = 1; retval = newSVpvs(""); if (SvROK(href) @@ -1418,57 +1416,66 @@ Data_Dumper_Dumpxs(href, ...) if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp)) seenhv = (HV*)SvRV(*svp); else - use_sparse_seen_hash = 1; + style.use_sparse_seen_hash = 1; if ((svp = hv_fetch(hv, "noseen", 6, FALSE))) - use_sparse_seen_hash = (SvOK(*svp) && SvIV(*svp) != 0); + style.use_sparse_seen_hash = (SvOK(*svp) && SvIV(*svp) != 0); if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp)) todumpav = (AV*)SvRV(*svp); if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp)) namesav = (AV*)SvRV(*svp); if ((svp = hv_fetch(hv, "indent", 6, FALSE))) - indent = SvIV(*svp); + style.indent = SvIV(*svp); if ((svp = hv_fetch(hv, "purity", 6, FALSE))) - purity = SvIV(*svp); + style.purity = SvIV(*svp); if ((svp = hv_fetch(hv, "terse", 5, FALSE))) terse = SvTRUE(*svp); if ((svp = hv_fetch(hv, "useqq", 5, FALSE))) - useqq = SvTRUE(*svp); + style.useqq = SvTRUE(*svp); if ((svp = hv_fetch(hv, "pad", 3, FALSE))) - pad = *svp; + style.pad = *svp; if ((svp = hv_fetch(hv, "xpad", 4, FALSE))) - xpad = *svp; + style.xpad = *svp; if ((svp = hv_fetch(hv, "apad", 4, FALSE))) apad = *svp; if ((svp = hv_fetch(hv, "sep", 3, FALSE))) - sep = *svp; + style.sep = *svp; if ((svp = hv_fetch(hv, "pair", 4, FALSE))) - pair = *svp; + style.pair = *svp; if ((svp = hv_fetch(hv, "varname", 7, FALSE))) varname = *svp; if ((svp = hv_fetch(hv, "freezer", 7, FALSE))) - freezer = *svp; + style.freezer = *svp; if ((svp = hv_fetch(hv, "toaster", 7, FALSE))) - toaster = *svp; + style.toaster = *svp; if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE))) - deepcopy = SvTRUE(*svp); + style.deepcopy = SvTRUE(*svp); if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE))) - quotekeys = SvTRUE(*svp); + style.quotekeys = SvTRUE(*svp); + if ((svp = hv_fetch(hv, "trailingcomma", 13, FALSE))) + style.trailingcomma = SvTRUE(*svp); if ((svp = hv_fetch(hv, "bless", 5, FALSE))) - bless = *svp; + style.bless = *svp; if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE))) - maxdepth = SvIV(*svp); + style.maxdepth = SvIV(*svp); if ((svp = hv_fetch(hv, "maxrecurse", 10, FALSE))) - maxrecurse = SvIV(*svp); + style.maxrecurse = SvIV(*svp); if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) { - sortkeys = *svp; - if (! SvTRUE(sortkeys)) - sortkeys = NULL; - else if (! (SvROK(sortkeys) && - SvTYPE(SvRV(sortkeys)) == SVt_PVCV) ) - { - /* flag to use qsortsv() for sorting hash keys */ - sortkeys = &PL_sv_yes; - } + SV *sv = *svp; + if (! SvTRUE(sv)) + style.sortkeys = NULL; + else if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) + style.sortkeys = sv; + else if (PERL_VERSION < 8) + /* 5.6 doesn't make sortsv() available to XS code, + * so we must use this helper instead. Note that we + * always allocate this mortal SV, but it will be + * used only if at least one hash is encountered + * while dumping recursively; an older version + * allocated it lazily as needed. */ + style.sortkeys = sv_2mortal(newSVpvs("Data::Dumper::_sortkeys")); + else + /* flag to use sortsv() for sorting hash keys */ + style.sortkeys = &PL_sv_yes; } postav = newAV(); @@ -1525,7 +1532,7 @@ Data_Dumper_Dumpxs(href, ...) sv_catpvn(name, tmpbuf, nchars); } - if (indent >= 2 && !terse) { + if (style.indent >= 2 && !terse) { SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3); newapad = newSVsv(apad); sv_catsv(newapad, tmpsv); @@ -1536,13 +1543,10 @@ Data_Dumper_Dumpxs(href, ...) PUTBACK; DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv, - postav, &level, indent, pad, xpad, newapad, sep, pair, - freezer, toaster, purity, deepcopy, quotekeys, - bless, maxdepth, sortkeys, use_sparse_seen_hash, - useqq, maxrecurse); + postav, 0, newapad, &style); SPAGAIN; - if (indent >= 2 && !terse) + if (style.indent >= 2 && !terse) SvREFCNT_dec(newapad); postlen = av_len(postav); @@ -1551,12 +1555,12 @@ Data_Dumper_Dumpxs(href, ...) sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name)); sv_catpvs(valstr, ";"); } - sv_catsv(retval, pad); + sv_catsv(retval, style.pad); sv_catsv(retval, valstr); - sv_catsv(retval, sep); + sv_catsv(retval, style.sep); if (postlen >= 0) { SSize_t i; - sv_catsv(retval, pad); + sv_catsv(retval, style.pad); for (i = 0; i <= postlen; ++i) { SV *elem; svp = av_fetch(postav, i, FALSE); @@ -1564,13 +1568,13 @@ Data_Dumper_Dumpxs(href, ...) sv_catsv(retval, elem); if (i < postlen) { sv_catpvs(retval, ";"); - sv_catsv(retval, sep); - sv_catsv(retval, pad); + sv_catsv(retval, style.sep); + sv_catsv(retval, style.pad); } } } sv_catpvs(retval, ";"); - sv_catsv(retval, sep); + sv_catsv(retval, style.sep); } sv_setpvn(valstr, "", 0); if (gimme == G_ARRAY) { diff --git a/dist/Data-Dumper/t/trailing_comma.t b/dist/Data-Dumper/t/trailing_comma.t new file mode 100644 index 0000000..8767bdf --- /dev/null +++ b/dist/Data-Dumper/t/trailing_comma.t @@ -0,0 +1,116 @@ +#!./perl -w +# t/trailing_comma.t - Test TrailingComma() + +BEGIN { + if ($ENV{PERL_CORE}){ + require Config; import Config; + no warnings 'once'; + if ($Config{'extensions'} !~ /\bData\/Dumper\b/) { + print "1..0 # Skip: Data::Dumper was not built\n"; + exit 0; + } + } +} + +use strict; + +use Data::Dumper; +use Test::More; +use lib qw( ./t/lib ); +use Testing qw( _dumptostr ); + +my @cases = ({ + input => [], + output => "[]", + desc => 'empty array', +}, { + input => [17], + output => "[17]", + desc => 'single-element array, no indent', + conf => { Indent => 0 }, +}, { + input => [17], + output => "[\n 17,\n]", + desc => 'single-element array, indent=1', + conf => { Indent => 1 }, +}, { + input => [17], + output => "[\n 17,\n ]", + desc => 'single-element array, indent=2', + conf => { Indent => 2 }, +}, { + input => [17, 18], + output => "[17,18]", + desc => 'two-element array, no indent', + conf => { Indent => 0 }, +}, { + input => [17, 18], + output => "[\n 17,\n 18,\n]", + desc => 'two-element array, indent=1', + conf => { Indent => 1 }, +}, { + input => [17, 18], + output => "[\n 17,\n 18,\n ]", + desc => 'two-element array, indent=2', + conf => { Indent => 2 }, +}, { + input => {}, + output => "{}", + desc => 'empty hash', +}, { + input => {foo => 17}, + output => "{'foo' => 17}", + desc => 'single-element hash, no indent', + conf => { Indent => 0 }, +}, { + input => {foo => 17}, + output => "{\n 'foo' => 17,\n}", + desc => 'single-element hash, indent=1', + conf => { Indent => 1 }, +}, { + input => {foo => 17}, + output => "{\n 'foo' => 17,\n }", + desc => 'single-element hash, indent=2', + conf => { Indent => 2 }, +}, { + input => {foo => 17, quux => 18}, + output => "{'foo' => 17,'quux' => 18}", + desc => 'two-element hash, no indent', + conf => { Indent => 0 }, +}, { + input => {foo => 17, quux => 18}, + output => "{\n 'foo' => 17,\n 'quux' => 18,\n}", + desc => 'two-element hash, indent=1', + conf => { Indent => 1 }, +}, { + input => {foo => 17, quux => 18}, + output => "{\n 'foo' => 17,\n 'quux' => 18,\n }", + desc => 'two-element hash, indent=2', + conf => { Indent => 2 }, +}); + +my $xs_available = !$Data::Dumper::Useperl; +my $tests_per_case = $xs_available ? 2 : 1; + +plan tests => $tests_per_case * @cases; + +for my $case (@cases) { + run_case($case, $xs_available ? 'XS' : 'PP'); + if ($xs_available) { + local $Data::Dumper::Useperl = 1; + run_case($case, 'PP'); + } +} + +sub run_case { + my ($case, $mode) = @_; + my ($input, $output, $desc, $conf) = @$case{qw}; + my $obj = Data::Dumper->new([$input]); + $obj->Trailingcomma(1); # default to on for these tests + $obj->Sortkeys(1); + for my $k (sort keys %{ $conf || {} }) { + $obj->$k($conf->{$k}); + } + chomp(my $got = _dumptostr($obj)); + is($got, "\$VAR1 = $output;", "$desc (in $mode mode)"); +} diff --git a/dist/ExtUtils-CBuilder/t/00-have-compiler.t b/dist/ExtUtils-CBuilder/t/00-have-compiler.t index 1661812..1073277 100644 --- a/dist/ExtUtils-CBuilder/t/00-have-compiler.t +++ b/dist/ExtUtils-CBuilder/t/00-have-compiler.t @@ -26,14 +26,34 @@ ok( $b, "got CBuilder object" ) or diag $@; # test missing compiler { -my $b1 = ExtUtils::CBuilder->new(quiet => 1); -configure_fake_missing_compilers($b1); -is( $b1->have_compiler, 0, "have_compiler: fake missing cc" ); + + my $b1 = ExtUtils::CBuilder->new(quiet => 1); + + configure_fake_missing_compilers($b1); + + # This will fork a child that will print + # 'Can't exec "djaadjfkadjkfajdf"' + # or similar on STDERR; so make sure fd2 is temporarily closed before + # the fork + open(my $orig_err, ">&", \*STDERR) or die "Can't dup STDERR: $!"; + close(STDERR); + my $res = $b1->have_compiler; + open(STDERR, ">&", $orig_err) or die "Can't dup \$orig_err $!"; + close($orig_err); + + is($res, 0, "have_compiler: fake missing cc" ); } { -my $b2 = ExtUtils::CBuilder->new(quiet => 1); -configure_fake_missing_compilers($b2); -is( $b2->have_cplusplus, 0, "have_cplusplus: fake missing c++" ); + my $b2 = ExtUtils::CBuilder->new(quiet => 1); + configure_fake_missing_compilers($b2); + + open(my $orig_err, ">&", \*STDERR) or die "Can't dup STDERR: $!"; + close(STDERR); + my $res = $b2->have_cplusplus; + open(STDERR, ">&", $orig_err) or die "Can't dup \$orig_err $!"; + close($orig_err); + + is($res, 0, "have_cplusplus: fake missing c++" ); } # test found compiler diff --git a/dist/ExtUtils-ParseXS/lib/perlxs.pod b/dist/ExtUtils-ParseXS/lib/perlxs.pod index d77ac1b..e887d33 100644 --- a/dist/ExtUtils-ParseXS/lib/perlxs.pod +++ b/dist/ExtUtils-ParseXS/lib/perlxs.pod @@ -1257,9 +1257,13 @@ to make it a string is recommended if long version numbers are used. The PROTOTYPES: keyword corresponds to B's C<-prototypes> and C<-noprototypes> options. This keyword overrides the command line options. -Prototypes are enabled by default. When prototypes are enabled XSUBs will +Prototypes are disabled by default. When prototypes are enabled, XSUBs will be given Perl prototypes. This keyword may be used multiple times in an XS module to enable and disable prototypes for different parts of the module. +Note that B will nag you if you don't explicitly enable or disable +prototypes, with: + + Please specify prototyping behavior for Foo.xs (see perlxs manual) To enable prototypes: diff --git a/dist/Module-CoreList/Changes b/dist/Module-CoreList/Changes index 134024b..3678b28 100644 --- a/dist/Module-CoreList/Changes +++ b/dist/Module-CoreList/Changes @@ -1,3 +1,9 @@ +5.20151220 + - Updated for v5.23.6 + +5.20151213 + - Updated for v5.22.1 + 5.20151120 - Updated for v5.23.5 diff --git a/dist/Module-CoreList/lib/Module/CoreList.pm b/dist/Module-CoreList/lib/Module/CoreList.pm index fe651bb..485d126 100644 --- a/dist/Module-CoreList/lib/Module/CoreList.pm +++ b/dist/Module-CoreList/lib/Module/CoreList.pm @@ -4,7 +4,7 @@ use vars qw/$VERSION %released %version %families %upstream %bug_tracker %deprecated %delta/; use Module::CoreList::TieHashDelta; use version; -$VERSION = '5.20151120'; +$VERSION = '5.20151220'; sub _released_order { # Sort helper, to make '?' sort after everything else (substr($released{$a}, 0, 1) eq "?") @@ -286,6 +286,8 @@ sub changes_between { 5.023003 => '2015-09-20', 5.023004 => '2015-10-20', 5.023005 => '2015-11-20', + 5.022001 => '2015-12-13', + 5.023006 => '2015-12-21', ); for my $version ( sort { $a <=> $b } keys %released ) { @@ -12002,6 +12004,98 @@ for my $version ( sort { $a <=> $b } keys %released ) { 'version::vpp' => 1, } }, + 5.022001 => { + delta_from => 5.022, + changed => { + 'B::Op_private' => '5.022001', + 'Config' => '5.022001', + 'Module::CoreList' => '5.20151213', + 'Module::CoreList::TieHashDelta'=> '5.20151213', + 'Module::CoreList::Utils'=> '5.20151213', + 'POSIX' => '1.53_01', + 'PerlIO::scalar' => '0.23', + 'Storable' => '2.53_01', + 'Win32' => '0.52', + 'warnings' => '1.34', + }, + removed => { + } + }, + 5.023006 => { + delta_from => 5.023005, + changed => { + 'B::Deparse' => '1.36', + 'B::Op_private' => '5.023006', + 'Benchmark' => '1.21', + 'CPAN::Meta::Requirements'=> '2.140', + 'CPAN::Meta::YAML' => '0.018', + 'Config' => '5.023006', + 'Cwd' => '3.60', + 'Data::Dumper' => '2.159', + 'DynaLoader' => '1.37', + 'File::Spec' => '3.60', + 'File::Spec::AmigaOS' => '3.60', + 'File::Spec::Cygwin' => '3.60', + 'File::Spec::Epoc' => '3.60', + 'File::Spec::Functions' => '3.60', + 'File::Spec::Mac' => '3.60', + 'File::Spec::OS2' => '3.60', + 'File::Spec::Unix' => '3.60', + 'File::Spec::VMS' => '3.60', + 'File::Spec::Win32' => '3.60', + 'Hash::Util::FieldHash' => '1.19', + 'Locale::Codes' => '3.37', + 'Locale::Codes::Constants'=> '3.37', + 'Locale::Codes::Country'=> '3.37', + 'Locale::Codes::Country_Codes'=> '3.37', + 'Locale::Codes::Country_Retired'=> '3.37', + 'Locale::Codes::Currency'=> '3.37', + 'Locale::Codes::Currency_Codes'=> '3.37', + 'Locale::Codes::Currency_Retired'=> '3.37', + 'Locale::Codes::LangExt'=> '3.37', + 'Locale::Codes::LangExt_Codes'=> '3.37', + 'Locale::Codes::LangExt_Retired'=> '3.37', + 'Locale::Codes::LangFam'=> '3.37', + 'Locale::Codes::LangFam_Codes'=> '3.37', + 'Locale::Codes::LangFam_Retired'=> '3.37', + 'Locale::Codes::LangVar'=> '3.37', + 'Locale::Codes::LangVar_Codes'=> '3.37', + 'Locale::Codes::LangVar_Retired'=> '3.37', + 'Locale::Codes::Language'=> '3.37', + 'Locale::Codes::Language_Codes'=> '3.37', + 'Locale::Codes::Language_Retired'=> '3.37', + 'Locale::Codes::Script' => '3.37', + 'Locale::Codes::Script_Codes'=> '3.37', + 'Locale::Codes::Script_Retired'=> '3.37', + 'Locale::Country' => '3.37', + 'Locale::Currency' => '3.37', + 'Locale::Language' => '3.37', + 'Locale::Script' => '3.37', + 'Math::BigInt::FastCalc'=> '0.38', + 'Module::CoreList' => '5.20151220', + 'Module::CoreList::TieHashDelta'=> '5.20151220', + 'Module::CoreList::Utils'=> '5.20151220', + 'Module::Metadata' => '1.000031', + 'Opcode' => '1.34', + 'PerlIO::mmap' => '0.016', + 'Pod::Perldoc' => '3.25_02', + 'SDBM_File' => '1.14', + 'Term::ANSIColor' => '4.04', + 'Test' => '1.28', + 'Unicode::Normalize' => '1.24', + 'XS::APItest' => '0.77', + 'base' => '2.23', + 'encoding::warnings' => '0.12', + 'fields' => '2.23', + 'locale' => '1.08', + 'strict' => '1.10', + 'threads' => '2.05', + 'threads::shared' => '1.50', + 'utf8' => '1.18', + }, + removed => { + } + }, ); sub is_core @@ -12607,6 +12701,20 @@ for my $version (sort { $a <=> $b } keys %delta) { removed => { } }, + 5.022001 => { + delta_from => 5.022, + changed => { + }, + removed => { + } + }, + 5.023006 => { + delta_from => 5.023005, + changed => { + }, + removed => { + } + }, ); for my $version (sort { $a <=> $b } keys %deprecated) { @@ -13019,7 +13127,6 @@ for my $version (sort { $a <=> $b } keys %deprecated) { 'bignum' => 'cpan', 'bigrat' => 'cpan', 'encoding' => 'cpan', - 'encoding::warnings' => 'cpan', 'experimental' => 'cpan', 'ok' => 'cpan', 'parent' => 'cpan', @@ -13068,7 +13175,7 @@ for my $version (sort { $a <=> $b } keys %deprecated) { 'CPAN::Meta::History' => 'https://github.com/Perl-Toolchain-Gang/CPAN-Meta/issues', 'CPAN::Meta::Merge' => 'https://github.com/Perl-Toolchain-Gang/CPAN-Meta/issues', 'CPAN::Meta::Prereqs' => 'https://github.com/Perl-Toolchain-Gang/CPAN-Meta/issues', - 'CPAN::Meta::Requirements'=> 'https://github.com/dagolden/CPAN-Meta-Requirements/issues', + 'CPAN::Meta::Requirements'=> 'https://github.com/Perl-Toolchain-Gang/CPAN-Meta-Requirements/issues', 'CPAN::Meta::Spec' => 'https://github.com/Perl-Toolchain-Gang/CPAN-Meta/issues', 'CPAN::Meta::Validator' => 'https://github.com/Perl-Toolchain-Gang/CPAN-Meta/issues', 'CPAN::Meta::YAML' => 'https://github.com/Perl-Toolchain-Gang/YAML-Tiny/issues', @@ -13279,8 +13386,8 @@ for my $version (sort { $a <=> $b } keys %deprecated) { 'Pod::Escapes' => undef, 'Pod::Find' => undef, 'Pod::InputObjects' => undef, - 'Pod::Man' => undef, - 'Pod::ParseLink' => undef, + 'Pod::Man' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=podlators', + 'Pod::ParseLink' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=podlators', 'Pod::ParseUtils' => undef, 'Pod::Parser' => undef, 'Pod::Perldoc' => undef, @@ -13326,10 +13433,10 @@ for my $version (sort { $a <=> $b } keys %deprecated) { 'Pod::Simple::TranscodeSmart'=> 'https://github.com/perl-pod/pod-simple/issues', 'Pod::Simple::XHTML' => 'https://github.com/perl-pod/pod-simple/issues', 'Pod::Simple::XMLOutStream'=> 'https://github.com/perl-pod/pod-simple/issues', - 'Pod::Text' => undef, - 'Pod::Text::Color' => undef, - 'Pod::Text::Overstrike' => undef, - 'Pod::Text::Termcap' => undef, + 'Pod::Text' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=podlators', + 'Pod::Text::Color' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=podlators', + 'Pod::Text::Overstrike' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=podlators', + 'Pod::Text::Termcap' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=podlators', 'Pod::Usage' => undef, 'Scalar::Util' => undef, 'Socket' => undef, @@ -13379,7 +13486,7 @@ for my $version (sort { $a <=> $b } keys %deprecated) { 'TAP::Parser::SourceHandler::RawTAP'=> 'http://rt.cpan.org/Public/Dist/Display.html?Name=Test-Harness', 'TAP::Parser::YAMLish::Reader'=> 'http://rt.cpan.org/Public/Dist/Display.html?Name=Test-Harness', 'TAP::Parser::YAMLish::Writer'=> 'http://rt.cpan.org/Public/Dist/Display.html?Name=Test-Harness', - 'Term::ANSIColor' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=Term-ANSIColor', + 'Term::ANSIColor' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=Term::ANSIColor', 'Term::Cap' => undef, 'Test::Builder' => 'http://github.com/Test-More/test-more/issues/', 'Test::Builder::IO::Scalar'=> 'http://github.com/Test-More/test-more/issues/', @@ -13428,7 +13535,6 @@ for my $version (sort { $a <=> $b } keys %deprecated) { 'bignum' => undef, 'bigrat' => undef, 'encoding' => undef, - 'encoding::warnings' => undef, 'experimental' => 'http://rt.cpan.org/Public/Dist/Display.html?Name=experimental', 'ok' => 'http://github.com/Test-More/test-more/issues/', 'parent' => undef, diff --git a/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm b/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm index d1f3c95..7781655 100644 --- a/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm +++ b/dist/Module-CoreList/lib/Module/CoreList/TieHashDelta.pm @@ -3,7 +3,7 @@ package Module::CoreList::TieHashDelta; use strict; use vars qw($VERSION); -$VERSION = '5.20151120'; +$VERSION = '5.20151220'; sub TIEHASH { my ($class, $changed, $removed, $parent) = @_; diff --git a/dist/Module-CoreList/lib/Module/CoreList/Utils.pm b/dist/Module-CoreList/lib/Module/CoreList/Utils.pm index 851bf62..f9410dd 100644 --- a/dist/Module-CoreList/lib/Module/CoreList/Utils.pm +++ b/dist/Module-CoreList/lib/Module/CoreList/Utils.pm @@ -6,7 +6,7 @@ use vars qw[$VERSION %utilities]; use Module::CoreList; use Module::CoreList::TieHashDelta; -$VERSION = '5.20151120'; +$VERSION = '5.20151220'; sub utilities { my $perl = shift; @@ -1101,6 +1101,20 @@ my %delta = ( removed => { } }, + 5.022001 => { + delta_from => 5.022, + changed => { + }, + removed => { + } + }, + 5.023006 => { + delta_from => 5.023005, + changed => { + }, + removed => { + } + }, ); for my $version (sort { $a <=> $b } keys %delta) { diff --git a/dist/PathTools/Cwd.pm b/dist/PathTools/Cwd.pm index 70f0eae..64618f9 100644 --- a/dist/PathTools/Cwd.pm +++ b/dist/PathTools/Cwd.pm @@ -3,7 +3,7 @@ use strict; use Exporter; use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); -$VERSION = '3.59'; +$VERSION = '3.60'; my $xs_version = $VERSION; $VERSION =~ tr/_//d; diff --git a/dist/PathTools/lib/File/Spec.pm b/dist/PathTools/lib/File/Spec.pm index 185bd0e..f416908 100644 --- a/dist/PathTools/lib/File/Spec.pm +++ b/dist/PathTools/lib/File/Spec.pm @@ -3,7 +3,7 @@ package File::Spec; use strict; use vars qw(@ISA $VERSION); -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; my %module = (MacOS => 'Mac', diff --git a/dist/PathTools/lib/File/Spec/AmigaOS.pm b/dist/PathTools/lib/File/Spec/AmigaOS.pm index 2d8cb27..f979f2f 100644 --- a/dist/PathTools/lib/File/Spec/AmigaOS.pm +++ b/dist/PathTools/lib/File/Spec/AmigaOS.pm @@ -4,7 +4,7 @@ use strict; use vars qw(@ISA $VERSION); require File::Spec::Unix; -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; @ISA = qw(File::Spec::Unix); diff --git a/dist/PathTools/lib/File/Spec/Cygwin.pm b/dist/PathTools/lib/File/Spec/Cygwin.pm index d2ff9a0..558a742 100644 --- a/dist/PathTools/lib/File/Spec/Cygwin.pm +++ b/dist/PathTools/lib/File/Spec/Cygwin.pm @@ -4,7 +4,7 @@ use strict; use vars qw(@ISA $VERSION); require File::Spec::Unix; -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; @ISA = qw(File::Spec::Unix); diff --git a/dist/PathTools/lib/File/Spec/Epoc.pm b/dist/PathTools/lib/File/Spec/Epoc.pm index b19b9e4..afca637 100644 --- a/dist/PathTools/lib/File/Spec/Epoc.pm +++ b/dist/PathTools/lib/File/Spec/Epoc.pm @@ -3,7 +3,7 @@ package File::Spec::Epoc; use strict; use vars qw($VERSION @ISA); -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; require File::Spec::Unix; diff --git a/dist/PathTools/lib/File/Spec/Functions.pm b/dist/PathTools/lib/File/Spec/Functions.pm index e31c31e..276ddcf 100644 --- a/dist/PathTools/lib/File/Spec/Functions.pm +++ b/dist/PathTools/lib/File/Spec/Functions.pm @@ -5,7 +5,7 @@ use strict; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION); -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; require Exporter; diff --git a/dist/PathTools/lib/File/Spec/Mac.pm b/dist/PathTools/lib/File/Spec/Mac.pm index 8cd6a67..4da700c 100644 --- a/dist/PathTools/lib/File/Spec/Mac.pm +++ b/dist/PathTools/lib/File/Spec/Mac.pm @@ -4,7 +4,7 @@ use strict; use vars qw(@ISA $VERSION); require File::Spec::Unix; -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; @ISA = qw(File::Spec::Unix); diff --git a/dist/PathTools/lib/File/Spec/OS2.pm b/dist/PathTools/lib/File/Spec/OS2.pm index 188d92e..fad1198 100644 --- a/dist/PathTools/lib/File/Spec/OS2.pm +++ b/dist/PathTools/lib/File/Spec/OS2.pm @@ -4,7 +4,7 @@ use strict; use vars qw(@ISA $VERSION); require File::Spec::Unix; -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; @ISA = qw(File::Spec::Unix); diff --git a/dist/PathTools/lib/File/Spec/Unix.pm b/dist/PathTools/lib/File/Spec/Unix.pm index f469763..94e4351 100644 --- a/dist/PathTools/lib/File/Spec/Unix.pm +++ b/dist/PathTools/lib/File/Spec/Unix.pm @@ -3,7 +3,7 @@ package File::Spec::Unix; use strict; use vars qw($VERSION); -$VERSION = '3.59'; +$VERSION = '3.60'; my $xs_version = $VERSION; $VERSION =~ tr/_//d; diff --git a/dist/PathTools/lib/File/Spec/VMS.pm b/dist/PathTools/lib/File/Spec/VMS.pm index 8923c01..b050bf2 100644 --- a/dist/PathTools/lib/File/Spec/VMS.pm +++ b/dist/PathTools/lib/File/Spec/VMS.pm @@ -4,7 +4,7 @@ use strict; use vars qw(@ISA $VERSION); require File::Spec::Unix; -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; @ISA = qw(File::Spec::Unix); diff --git a/dist/PathTools/lib/File/Spec/Win32.pm b/dist/PathTools/lib/File/Spec/Win32.pm index a7cf3f9..8839800 100644 --- a/dist/PathTools/lib/File/Spec/Win32.pm +++ b/dist/PathTools/lib/File/Spec/Win32.pm @@ -5,7 +5,7 @@ use strict; use vars qw(@ISA $VERSION); require File::Spec::Unix; -$VERSION = '3.59'; +$VERSION = '3.60'; $VERSION =~ tr/_//d; @ISA = qw(File::Spec::Unix); diff --git a/dist/Test/lib/Test.pm b/dist/Test/lib/Test.pm index cad2bcb..de20922 100644 --- a/dist/Test/lib/Test.pm +++ b/dist/Test/lib/Test.pm @@ -20,7 +20,7 @@ sub _reset_globals { $planned = 0; } -$VERSION = '1.27'; +$VERSION = '1.28'; require Exporter; @ISA=('Exporter'); @@ -461,23 +461,24 @@ sub _complain { my $diag = $$detail{diagnostic}; $diag =~ s/\n/\n#/g if defined $diag; + my $out = $$detail{todo} ? $TESTOUT : $TESTERR; $$detail{context} .= ' *TODO*' if $$detail{todo}; if (!$$detail{compare}) { if (!$diag) { - print $TESTERR "# Failed test $ntest in $$detail{context}\n"; + print $out "# Failed test $ntest in $$detail{context}\n"; } else { - print $TESTERR "# Failed test $ntest in $$detail{context}: $diag\n"; + print $out "# Failed test $ntest in $$detail{context}: $diag\n"; } } else { my $prefix = "Test $ntest"; - print $TESTERR "# $prefix got: " . _quote($result) . + print $out "# $prefix got: " . _quote($result) . " ($$detail{context})\n"; $prefix = ' ' x (length($prefix) - 5); my $expected_quoted = (defined $$detail{regex}) ? 'qr{'.($$detail{regex}).'}' : _quote($expected); - print $TESTERR "# $prefix Expected: $expected_quoted", + print $out "# $prefix Expected: $expected_quoted", $diag ? " ($diag)" : (), "\n"; _diff_complain( $result, $expected, $detail, $prefix ) @@ -485,7 +486,7 @@ sub _complain { } if(defined $Program_Lines{ $$detail{file} }[ $$detail{line} ]) { - print $TESTERR + print $out "# $$detail{file} line $$detail{line} is: $Program_Lines{ $$detail{file} }[ $$detail{line} ]\n" if $Program_Lines{ $$detail{file} }[ $$detail{line} ] =~ m/[^\s\#\(\)\{\}\[\]\;]/; # Otherwise it's uninformative diff --git a/dist/base/Changes b/dist/base/Changes index c4cbb18..44d579f 100644 --- a/dist/base/Changes +++ b/dist/base/Changes @@ -1,3 +1,6 @@ +2.23 + - no changes since 2.22_01 + 2.22_01 - require perl v5.8.0; tests for [perl #121196] break on 5.6.1, and I (rjbs) am not comfortable spending time determining whether the fault diff --git a/dist/base/lib/base.pm b/dist/base/lib/base.pm index 7eff8b2..6fee600 100644 --- a/dist/base/lib/base.pm +++ b/dist/base/lib/base.pm @@ -3,7 +3,7 @@ package base; use strict 'vars'; use vars qw($VERSION); -$VERSION = '2.22_01'; +$VERSION = '2.23'; $VERSION =~ tr/_//d; # constant.pm is slow diff --git a/dist/base/lib/fields.pm b/dist/base/lib/fields.pm index 630f8ce..c40978b 100644 --- a/dist/base/lib/fields.pm +++ b/dist/base/lib/fields.pm @@ -12,7 +12,7 @@ unless( eval q{require warnings::register; warnings::register->import; 1} ) { } use vars qw(%attr $VERSION); -$VERSION = '2.22_01'; +$VERSION = '2.23'; $VERSION =~ tr/_//d; # constant.pm is slow diff --git a/cpan/encoding-warnings/lib/encoding/warnings.pm b/dist/encoding-warnings/lib/encoding/warnings.pm similarity index 98% rename from cpan/encoding-warnings/lib/encoding/warnings.pm rename to dist/encoding-warnings/lib/encoding/warnings.pm index 5e6aec0..d5c4184 100644 --- a/cpan/encoding-warnings/lib/encoding/warnings.pm +++ b/dist/encoding-warnings/lib/encoding/warnings.pm @@ -1,5 +1,5 @@ package encoding::warnings; -$encoding::warnings::VERSION = '0.11'; +$encoding::warnings::VERSION = '0.12'; use strict; use 5.007; @@ -170,7 +170,9 @@ sub import { ], $class, ); + no warnings 'deprecated'; ${^ENCODING} = $decoder; + use warnings 'deprecated'; $^H{$class} = 1; } diff --git a/cpan/encoding-warnings/t/1-warning.t b/dist/encoding-warnings/t/1-warning.t similarity index 85% rename from cpan/encoding-warnings/t/1-warning.t rename to dist/encoding-warnings/t/1-warning.t index c7525ae..9b04cb6 100644 --- a/cpan/encoding-warnings/t/1-warning.t +++ b/dist/encoding-warnings/t/1-warning.t @@ -3,6 +3,10 @@ # $Revision: #5 $ $Change: 6145 $ $DateTime: 2004-07-16T03:49:06.717424Z $ BEGIN { + if (ord("A") != 65) { + print "1..0 # Skip: Encode not working on EBCDIC\n"; + exit 0; + } unless (eval { require Encode } ) { print "1..0 # Skip: no Encode\n"; exit 0; diff --git a/cpan/encoding-warnings/t/2-fatal.t b/dist/encoding-warnings/t/2-fatal.t similarity index 85% rename from cpan/encoding-warnings/t/2-fatal.t rename to dist/encoding-warnings/t/2-fatal.t index 4fc16a1..ef00181 100644 --- a/cpan/encoding-warnings/t/2-fatal.t +++ b/dist/encoding-warnings/t/2-fatal.t @@ -3,6 +3,10 @@ # $Revision: #4 $ $Change: 1626 $ $DateTime: 2004-03-14T16:53:19.351256Z $ BEGIN { + if (ord("A") != 65) { + print "1..0 # Skip: Encode not working on EBCDIC\n"; + exit 0; + } unless (eval { require Encode } ) { print "1..0 # Skip: no Encode\n"; exit 0; diff --git a/cpan/encoding-warnings/t/3-normal.t b/dist/encoding-warnings/t/3-normal.t similarity index 100% rename from cpan/encoding-warnings/t/3-normal.t rename to dist/encoding-warnings/t/3-normal.t diff --git a/cpan/encoding-warnings/t/4-lexical.t b/dist/encoding-warnings/t/4-lexical.t similarity index 88% rename from cpan/encoding-warnings/t/4-lexical.t rename to dist/encoding-warnings/t/4-lexical.t index e80c504..bdd9f21 100644 --- a/cpan/encoding-warnings/t/4-lexical.t +++ b/dist/encoding-warnings/t/4-lexical.t @@ -1,6 +1,10 @@ use strict; use Test; BEGIN { + if (ord("A") != 65) { + print "1..0 # Skip: Encode not working on EBCDIC\n"; + exit 0; + } use Config; if ($Config::Config{'extensions'} !~ /\bEncode\b/) { print "1..0 # Skip: Encode was not built\n"; diff --git a/dist/threads-shared/lib/threads/shared.pm b/dist/threads-shared/lib/threads/shared.pm index 8b0c950..dc76ab2 100644 --- a/dist/threads-shared/lib/threads/shared.pm +++ b/dist/threads-shared/lib/threads/shared.pm @@ -7,7 +7,7 @@ use warnings; use Scalar::Util qw(reftype refaddr blessed); -our $VERSION = '1.49'; # Please update the pod, too. +our $VERSION = '1.50'; # Please update the pod, too. my $XS_VERSION = $VERSION; $VERSION = eval $VERSION; diff --git a/dist/threads-shared/shared.xs b/dist/threads-shared/shared.xs index 66dadec..e323788 100644 --- a/dist/threads-shared/shared.xs +++ b/dist/threads-shared/shared.xs @@ -1166,6 +1166,8 @@ const MGVTBL sharedsv_array_vtbl = { }; +#if 0 +/* XXX unused dead code */ /* Recursively unlocks a shared sv. */ static void @@ -1175,6 +1177,7 @@ Perl_sharedsv_unlock(pTHX_ SV *ssv) assert(ul); recursive_lock_release(aTHX_ &ul->lock); } +#endif /* Recursive locks on a sharedsv. diff --git a/dist/threads/lib/threads.pm b/dist/threads/lib/threads.pm index 0f6bca4..2e4b859 100644 --- a/dist/threads/lib/threads.pm +++ b/dist/threads/lib/threads.pm @@ -5,7 +5,7 @@ use 5.008; use strict; use warnings; -our $VERSION = '2.04'; +our $VERSION = '2.05'; my $XS_VERSION = $VERSION; $VERSION = eval $VERSION; diff --git a/dist/threads/threads.xs b/dist/threads/threads.xs index 366877e..82a59bb 100644 --- a/dist/threads/threads.xs +++ b/dist/threads/threads.xs @@ -264,6 +264,7 @@ S_ithread_clear(pTHX_ ithread *thread) */ STATIC void S_ithread_free(pTHX_ ithread *thread) + PERL_TSA_RELEASE(thread->mutex) { #ifdef WIN32 HANDLE handle; @@ -326,6 +327,7 @@ S_ithread_free(pTHX_ ithread *thread) static void S_ithread_count_inc(pTHX_ ithread *thread) + PERL_TSA_EXCLUDES(thread->mutex) { MUTEX_LOCK(&thread->mutex); thread->count++; @@ -715,17 +717,20 @@ S_SV_to_ithread(pTHX_ SV *sv) /* threads->create() * Called in context of parent thread. - * Called with MY_POOL.create_destruct_mutex locked. (Unlocked on error.) + * Called with my_pool->create_destruct_mutex locked. + * (Unlocked both on error and on success.) */ STATIC ithread * S_ithread_create( PerlInterpreter *parent_perl, + my_pool_t *my_pool, SV *init_function, IV stack_size, int gimme, int exit_opt, int params_start, int num_params) + PERL_TSA_RELEASE(my_pool->create_destruct_mutex) { dTHXa(parent_perl); ithread *thread; @@ -741,18 +746,17 @@ S_ithread_create( int rc_stack_size = 0; int rc_thread_create = 0; #endif - dMY_POOL; /* Allocate thread structure in context of the main thread's interpreter */ { - PERL_SET_CONTEXT(MY_POOL.main_thread.interp); + PERL_SET_CONTEXT(my_pool->main_thread.interp); thread = (ithread *)PerlMemShared_malloc(sizeof(ithread)); } PERL_SET_CONTEXT(aTHX); if (!thread) { /* This lock was acquired in ithread_create() * prior to calling S_ithread_create(). */ - MUTEX_UNLOCK(&MY_POOL.create_destruct_mutex); + MUTEX_UNLOCK(&my_pool->create_destruct_mutex); { int fd = PerlIO_fileno(Perl_error_log); if (fd >= 0) { @@ -765,11 +769,11 @@ S_ithread_create( Zero(thread, 1, ithread); /* Add to threads list */ - thread->next = &MY_POOL.main_thread; - thread->prev = MY_POOL.main_thread.prev; - MY_POOL.main_thread.prev = thread; + thread->next = &my_pool->main_thread; + thread->prev = my_pool->main_thread.prev; + my_pool->main_thread.prev = thread; thread->prev->next = thread; - MY_POOL.total_threads++; + my_pool->total_threads++; /* 1 ref to be held by the local var 'thread' in S_ithread_run(). * 1 ref to be held by the threads object that we assume we will @@ -785,7 +789,7 @@ S_ithread_create( MUTEX_INIT(&thread->mutex); MUTEX_LOCK(&thread->mutex); /* See S_ithread_run() for more detail. */ - thread->tid = MY_POOL.tid_counter++; + thread->tid = my_pool->tid_counter++; thread->stack_size = S_good_stack_size(aTHX_ stack_size); thread->gimme = gimme; thread->state = exit_opt; @@ -995,7 +999,7 @@ S_ithread_create( /* Must unlock mutex for destruct call */ /* This lock was acquired in ithread_create() * prior to calling S_ithread_create(). */ - MUTEX_UNLOCK(&MY_POOL.create_destruct_mutex); + MUTEX_UNLOCK(&my_pool->create_destruct_mutex); thread->state |= PERL_ITHR_NONVIABLE; S_ithread_free(aTHX_ thread); /* Releases MUTEX */ #ifndef WIN32 @@ -1010,9 +1014,13 @@ S_ithread_create( return (NULL); } - MY_POOL.running_threads++; + my_pool->running_threads++; + MUTEX_UNLOCK(&my_pool->create_destruct_mutex); return (thread); +CLANG_DIAG_IGNORE(-Wthread-safety); +/* warning: mutex 'thread->mutex' is not held on every path through here [-Wthread-safety-analysis] */ } +CLANG_DIAG_RESTORE; #endif /* USE_ITHREADS */ @@ -1136,7 +1144,8 @@ ithread_create(...) /* Create thread */ MUTEX_LOCK(&MY_POOL.create_destruct_mutex); - thread = S_ithread_create(aTHX_ function_to_call, + thread = S_ithread_create(aTHX_ &MY_POOL, + function_to_call, stack_size, context, exit_opt, @@ -1146,11 +1155,13 @@ ithread_create(...) XSRETURN_UNDEF; /* Mutex already unlocked */ } ST(0) = sv_2mortal(S_ithread_to_SV(aTHX_ Nullsv, thread, classname, FALSE)); - MUTEX_UNLOCK(&MY_POOL.create_destruct_mutex); /* Let thread run. */ /* See S_ithread_run() for more detail. */ + CLANG_DIAG_IGNORE(-Wthread-safety); + /* warning: releasing mutex 'thread->mutex' that was not held [-Wthread-safety-analysis] */ MUTEX_UNLOCK(&thread->mutex); + CLANG_DIAG_RESTORE; /* XSRETURN(1); - implied */ diff --git a/doop.c b/doop.c index 5dbd8a2..dbf26d6 100644 --- a/doop.c +++ b/doop.c @@ -1002,6 +1002,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) const char *rsave; bool left_utf; bool right_utf; + bool do_warn_above_ff = ckWARN_d(WARN_DEPRECATED); STRLEN needlen = 0; PERL_ARGS_ASSERT_DO_VOP; @@ -1017,7 +1018,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) } rsave = rc = SvPV_nomg_const(right, rightlen); - /* This need to come after SvPV to ensure that string overloading has + /* This needs to come after SvPV to ensure that string overloading has fired off. */ left_utf = DO_UTF8(left); @@ -1082,6 +1083,12 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rulen -= ulen; duc = luc & ruc; dc = (char*)uvchr_to_utf8((U8*)dc, duc); + if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); + /* Warn only once per operation */ + do_warn_above_ff = FALSE; + } } if (sv == left || sv == right) (void)sv_usepvn(sv, dcorig, needlen); @@ -1097,6 +1104,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rulen -= ulen; duc = luc ^ ruc; dc = (char*)uvchr_to_utf8((U8*)dc, duc); + if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); + do_warn_above_ff = FALSE; + } } goto mop_up_utf; case OP_BIT_OR: @@ -1109,6 +1121,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rulen -= ulen; duc = luc | ruc; dc = (char*)uvchr_to_utf8((U8*)dc, duc); + if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); + do_warn_above_ff = FALSE; + } } mop_up_utf: if (rulen) diff --git a/dquote.c b/dquote.c index 42864d4..895f17d 100644 --- a/dquote.c +++ b/dquote.c @@ -158,10 +158,6 @@ Perl_grok_bslash_o(pTHX_ char **s, UV *uv, const char** error_msg, /* Return past the '}' */ *s = e + 1; - /* guarantee replacing "\o{...}" with utf8 bytes fits within - * existing space */ - assert(UVCHR_SKIP(*uv) < *s - start); - return TRUE; } diff --git a/dquote_inline.h b/dquote_inline.h index 02c4f1d..050b14f 100644 --- a/dquote_inline.h +++ b/dquote_inline.h @@ -101,7 +101,7 @@ S_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg, } return FALSE; } - goto ok; + return TRUE; } e = strchr(*s, '}'); @@ -128,7 +128,7 @@ S_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg, } *s = e + 1; *uv = 0; - goto ok; + return TRUE; } flags |= PERL_SCAN_ALLOW_UNDERSCORES; @@ -150,10 +150,6 @@ S_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg, /* Return past the '}' */ *s = e + 1; - ok: - /* guarantee replacing "\x{...}" with utf8 bytes fits within - * existing space */ - assert(UVCHR_SKIP(*uv) < *s - start); return TRUE; } diff --git a/ebcdic_tables.h b/ebcdic_tables.h index 344a990..5344d39 100644 --- a/ebcdic_tables.h +++ b/ebcdic_tables.h @@ -18,84 +18,92 @@ /* Index is ASCII platform code point; value is EBCDIC 1047 equivalent */ EXTCONST U8 PL_a2e[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 21, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, - 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, - 124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 95, 109, - 121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, - 32, 33, 34, 35, 36, 37, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62, 255, - 65, 170, 74, 177, 159, 178, 106, 181, 187, 180, 154, 138, 176, 202, 175, 188, - 144, 143, 234, 250, 190, 160, 182, 179, 157, 218, 155, 139, 183, 184, 185, 171, - 100, 101, 98, 102, 99, 103, 158, 104, 116, 113, 114, 115, 120, 117, 118, 119, - 172, 105, 237, 238, 235, 239, 236, 191, 128, 253, 254, 251, 252, 186, 174, 89, - 68, 69, 66, 70, 67, 71, 156, 72, 84, 81, 82, 83, 88, 85, 86, 87, - 140, 73, 205, 206, 203, 207, 204, 225, 112, 221, 222, 219, 220, 141, 142, 223 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0xAA,0x4A,0xB1,0x9F,0xB2,0x6A,0xB5,0xBB,0xB4,0x9A,0x8A,0xB0,0xCA,0xAF,0xBC, +0x90,0x8F,0xEA,0xFA,0xBE,0xA0,0xB6,0xB3,0x9D,0xDA,0x9B,0x8B,0xB7,0xB8,0xB9,0xAB, +0x64,0x65,0x62,0x66,0x63,0x67,0x9E,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, +0xAC,0x69,0xED,0xEE,0xEB,0xEF,0xEC,0xBF,0x80,0xFD,0xFE,0xFB,0xFC,0xBA,0xAE,0x59, +0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, +0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xDD,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 1047 code point; value is ASCII platform equivalent */ EXTCONST U8 PL_e2a[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 10, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 133, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 226, 228, 224, 225, 227, 229, 231, 241, 162, 46, 60, 40, 43, 124, - 38, 233, 234, 235, 232, 237, 238, 239, 236, 223, 33, 36, 42, 41, 59, 94, - 45, 47, 194, 196, 192, 193, 195, 197, 199, 209, 166, 44, 37, 95, 62, 63, - 248, 201, 202, 203, 200, 205, 206, 207, 204, 96, 58, 35, 64, 39, 61, 34, - 216, 97, 98, 99, 100, 101, 102, 103, 104, 105, 171, 187, 240, 253, 254, 177, - 176, 106, 107, 108, 109, 110, 111, 112, 113, 114, 170, 186, 230, 184, 198, 164, - 181, 126, 115, 116, 117, 118, 119, 120, 121, 122, 161, 191, 208, 91, 222, 174, - 172, 163, 165, 183, 169, 167, 182, 188, 189, 190, 221, 168, 175, 93, 180, 215, - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, 173, 244, 246, 242, 243, 245, - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 185, 251, 252, 249, 250, 255, - 92, 247, 83, 84, 85, 86, 87, 88, 89, 90, 178, 212, 214, 210, 211, 213, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 179, 219, 220, 217, 218, 159 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, +0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, +0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, +0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, +0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, +0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, +0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, +0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, +0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* (Confusingly named) Index is EBCDIC 1047 I8 byte; value is * EBCDIC 1047 UTF-EBCDIC equivalent */ EXTCONST U8 PL_utf2e[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 21, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, - 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, - 124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 95, 109, - 121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, - 32, 33, 34, 35, 36, 37, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62, 255, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 98, 99, 100, 101, 102, 103, 104, 105, 106, 112, 113, 114, 115, - 116, 117, 118, 119, 120, 128, 138, 139, 140, 141, 142, 143, 144, 154, 155, 156, - 157, 158, 159, 160, 170, 171, 172, 174, 175, 176, 177, 178, 179, 180, 181, 182, - 183, 184, 185, 186, 187, 188, 190, 191, 202, 203, 204, 205, 206, 207, 218, 219, - 220, 221, 222, 223, 225, 234, 235, 236, 237, 238, 239, 250, 251, 252, 253, 254 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, +0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, +0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, +0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, +0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, +0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* (Confusingly named) Index is EBCDIC 1047 UTF-EBCDIC byte; value is * EBCDIC 1047 I8 equivalent */ EXTCONST U8 PL_e2utf[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 10, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 133, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 46, 60, 40, 43, 124, - 38, 170, 171, 172, 173, 174, 175, 176, 177, 178, 33, 36, 42, 41, 59, 94, - 45, 47, 179, 180, 181, 182, 183, 184, 185, 186, 187, 44, 37, 95, 62, 63, - 188, 189, 190, 191, 192, 193, 194, 195, 196, 96, 58, 35, 64, 39, 61, 34, - 197, 97, 98, 99, 100, 101, 102, 103, 104, 105, 198, 199, 200, 201, 202, 203, - 204, 106, 107, 108, 109, 110, 111, 112, 113, 114, 205, 206, 207, 208, 209, 210, - 211, 126, 115, 116, 117, 118, 119, 120, 121, 122, 212, 213, 214, 91, 215, 216, - 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 93, 230, 231, - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237, - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 241, 242, 243, - 92, 244, 83, 84, 85, 86, 87, 88, 89, 90, 245, 246, 247, 248, 249, 250, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 251, 252, 253, 254, 255, 159 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0x21,0x24,0x2A,0x29,0x3B,0x5E, +0x2D,0x2F,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0x2C,0x25,0x5F,0x3E,0x3F, +0xBC,0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0xC4,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xC5,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB, +0xCC,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xCD,0xCE,0xCF,0xD0,0xD1,0xD2, +0xD3,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xD4,0xD5,0xD6,0x5B,0xD7,0xD8, +0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0x5D,0xE6,0xE7, +0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, +0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xEE,0xEF,0xF0,0xF1,0xF2,0xF3, +0x5C,0xF4,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFB,0xFC,0xFD,0xFE,0xFF,0x9F +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 1047 UTF-EBCDIC byte; value is UTF8SKIP for start bytes; @@ -118,92 +126,100 @@ EXTCONST U8 PL_utf8skip[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 14, 1 }; /* Index is EBCDIC 1047 code point; value is its lowercase equivalent */ EXTCONST U8 PL_latin1_lc[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 66, 67, 68, 69, 70, 71, 72, 73, 106, 107, 108, 109, 110, 111, - 112, 81, 82, 83, 84, 85, 86, 87, 88, 121, 122, 123, 124, 125, 126, 127, - 112, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 156, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 140, 173, 142, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 141, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 203, 204, 205, 206, 207, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 219, 220, 221, 222, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 203, 204, 205, 206, 207, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 219, 220, 221, 222, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x70,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9C,0x9F, +0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0x8C,0xAD,0x8E,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0x8D,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 1047 code point; value is its uppercase equivalent. * The 'mod' in the name means that codepoints whose uppercase is above 255 or * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ EXTCONST U8 PL_mod_latin1_uc[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 98, 99, 100, 101, 102, 103, 104, 105, 74, 75, 76, 77, 78, 79, - 80, 113, 114, 115, 116, 117, 118, 119, 120, 223, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 128, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 172, 186, 174, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 158, 157, 158, 159, - 223, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 235, 236, 237, 238, 239, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 251, 252, 253, 254, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0xDF,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x80,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xBA,0xAE,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9E,0x9F, +0xDF,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, +0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, +0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 1047 code point; For A-Z, value is a-z; for a-z, value * is A-Z; all other code points map to themselves */ EXTCONST U8 PL_fold[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 140, 141, 142, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 156, 157, 158, 159, - 160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 203, 204, 205, 206, 207, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 219, 220, 221, 222, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, +0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 1047 code point; value is its other fold-pair equivalent * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is * the code point itself */ EXTCONST U8 PL_fold_latin1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 98, 99, 100, 101, 102, 103, 104, 105, 74, 75, 76, 77, 78, 79, - 80, 113, 114, 115, 116, 117, 118, 119, 120, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 66, 67, 68, 69, 70, 71, 72, 73, 106, 107, 108, 109, 110, 111, - 128, 81, 82, 83, 84, 85, 86, 87, 88, 121, 122, 123, 124, 125, 126, 127, - 112, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 172, 186, 174, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 158, 157, 156, 159, - 160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 140, 173, 142, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 141, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 235, 236, 237, 238, 239, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 251, 252, 253, 254, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 203, 204, 205, 206, 207, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 219, 220, 221, 222, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x80,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x70,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xBA,0xAE,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9C,0x9F, +0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0x8C,0xAD,0x8E,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0x8D,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; #endif /* EBCDIC 1047 */ @@ -215,84 +231,92 @@ EXTCONST U8 PL_fold_latin1[] = { /* Index is ASCII platform code point; value is EBCDIC 037 equivalent */ EXTCONST U8 PL_a2e[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, - 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, - 124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 186, 224, 187, 176, 109, - 121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, - 32, 33, 34, 35, 36, 21, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62, 255, - 65, 170, 74, 177, 159, 178, 106, 181, 189, 180, 154, 138, 95, 202, 175, 188, - 144, 143, 234, 250, 190, 160, 182, 179, 157, 218, 155, 139, 183, 184, 185, 171, - 100, 101, 98, 102, 99, 103, 158, 104, 116, 113, 114, 115, 120, 117, 118, 119, - 172, 105, 237, 238, 235, 239, 236, 191, 128, 253, 254, 251, 252, 173, 174, 89, - 68, 69, 66, 70, 67, 71, 156, 72, 84, 81, 82, 83, 88, 85, 86, 87, - 140, 73, 205, 206, 203, 207, 204, 225, 112, 221, 222, 219, 220, 141, 142, 223 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xBA,0xE0,0xBB,0xB0,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0xAA,0x4A,0xB1,0x9F,0xB2,0x6A,0xB5,0xBD,0xB4,0x9A,0x8A,0x5F,0xCA,0xAF,0xBC, +0x90,0x8F,0xEA,0xFA,0xBE,0xA0,0xB6,0xB3,0x9D,0xDA,0x9B,0x8B,0xB7,0xB8,0xB9,0xAB, +0x64,0x65,0x62,0x66,0x63,0x67,0x9E,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, +0xAC,0x69,0xED,0xEE,0xEB,0xEF,0xEC,0xBF,0x80,0xFD,0xFE,0xFB,0xFC,0xAD,0xAE,0x59, +0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, +0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xDD,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 037 code point; value is ASCII platform equivalent */ EXTCONST U8 PL_e2a[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 226, 228, 224, 225, 227, 229, 231, 241, 162, 46, 60, 40, 43, 124, - 38, 233, 234, 235, 232, 237, 238, 239, 236, 223, 33, 36, 42, 41, 59, 172, - 45, 47, 194, 196, 192, 193, 195, 197, 199, 209, 166, 44, 37, 95, 62, 63, - 248, 201, 202, 203, 200, 205, 206, 207, 204, 96, 58, 35, 64, 39, 61, 34, - 216, 97, 98, 99, 100, 101, 102, 103, 104, 105, 171, 187, 240, 253, 254, 177, - 176, 106, 107, 108, 109, 110, 111, 112, 113, 114, 170, 186, 230, 184, 198, 164, - 181, 126, 115, 116, 117, 118, 119, 120, 121, 122, 161, 191, 208, 221, 222, 174, - 94, 163, 165, 183, 169, 167, 182, 188, 189, 190, 91, 93, 175, 168, 180, 215, - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, 173, 244, 246, 242, 243, 245, - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 185, 251, 252, 249, 250, 255, - 92, 247, 83, 84, 85, 86, 87, 88, 89, 90, 178, 212, 214, 210, 211, 213, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 179, 219, 220, 217, 218, 159 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x0A,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0xAC, +0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, +0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, +0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, +0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0xDD,0xDE,0xAE, +0x5E,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0x5B,0x5D,0xAF,0xA8,0xB4,0xD7, +0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, +0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, +0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* (Confusingly named) Index is EBCDIC 037 I8 byte; value is * EBCDIC 037 UTF-EBCDIC equivalent */ EXTCONST U8 PL_utf2e[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, - 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, - 124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 186, 224, 187, 176, 109, - 121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, - 32, 33, 34, 35, 36, 21, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62, 255, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 95, 98, 99, 100, 101, 102, 103, 104, 105, 106, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 128, 138, 139, 140, 141, 142, 143, 144, 154, 155, - 156, 157, 158, 159, 160, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 188, 189, 190, 191, 202, 203, 204, 205, 206, 207, 218, 219, - 220, 221, 222, 223, 225, 234, 235, 236, 237, 238, 239, 250, 251, 252, 253, 254 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xBA,0xE0,0xBB,0xB0,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, +0x57,0x58,0x59,0x5F,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72, +0x73,0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B, +0x9C,0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB1,0xB2,0xB3,0xB4,0xB5, +0xB6,0xB7,0xB8,0xB9,0xBC,0xBD,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, +0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* (Confusingly named) Index is EBCDIC 037 UTF-EBCDIC byte; value is * EBCDIC 037 I8 equivalent */ EXTCONST U8 PL_e2utf[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 46, 60, 40, 43, 124, - 38, 170, 171, 172, 173, 174, 175, 176, 177, 178, 33, 36, 42, 41, 59, 179, - 45, 47, 180, 181, 182, 183, 184, 185, 186, 187, 188, 44, 37, 95, 62, 63, - 189, 190, 191, 192, 193, 194, 195, 196, 197, 96, 58, 35, 64, 39, 61, 34, - 198, 97, 98, 99, 100, 101, 102, 103, 104, 105, 199, 200, 201, 202, 203, 204, - 205, 106, 107, 108, 109, 110, 111, 112, 113, 114, 206, 207, 208, 209, 210, 211, - 212, 126, 115, 116, 117, 118, 119, 120, 121, 122, 213, 214, 215, 216, 217, 218, - 94, 219, 220, 221, 222, 223, 224, 225, 226, 227, 91, 93, 228, 229, 230, 231, - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237, - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 241, 242, 243, - 92, 244, 83, 84, 85, 86, 87, 88, 89, 90, 245, 246, 247, 248, 249, 250, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 251, 252, 253, 254, 255, 159 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x0A,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0x21,0x24,0x2A,0x29,0x3B,0xB3, +0x2D,0x2F,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0x2C,0x25,0x5F,0x3E,0x3F, +0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xC6,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC, +0xCD,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xCE,0xCF,0xD0,0xD1,0xD2,0xD3, +0xD4,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA, +0x5E,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0x5B,0x5D,0xE4,0xE5,0xE6,0xE7, +0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, +0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xEE,0xEF,0xF0,0xF1,0xF2,0xF3, +0x5C,0xF4,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFB,0xFC,0xFD,0xFE,0xFF,0x9F +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 037 UTF-EBCDIC byte; value is UTF8SKIP for start bytes; @@ -315,92 +339,100 @@ EXTCONST U8 PL_utf8skip[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 14, 1 }; /* Index is EBCDIC 037 code point; value is its lowercase equivalent */ EXTCONST U8 PL_latin1_lc[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 66, 67, 68, 69, 70, 71, 72, 73, 106, 107, 108, 109, 110, 111, - 112, 81, 82, 83, 84, 85, 86, 87, 88, 121, 122, 123, 124, 125, 126, 127, - 112, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 156, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 140, 141, 142, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 203, 204, 205, 206, 207, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 219, 220, 221, 222, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 203, 204, 205, 206, 207, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 219, 220, 221, 222, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x70,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9C,0x9F, +0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0x8C,0x8D,0x8E,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 037 code point; value is its uppercase equivalent. * The 'mod' in the name means that codepoints whose uppercase is above 255 or * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ EXTCONST U8 PL_mod_latin1_uc[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 98, 99, 100, 101, 102, 103, 104, 105, 74, 75, 76, 77, 78, 79, - 80, 113, 114, 115, 116, 117, 118, 119, 120, 223, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 128, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 172, 173, 174, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 158, 157, 158, 159, - 223, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 235, 236, 237, 238, 239, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 251, 252, 253, 254, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0xDF,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x80,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xAD,0xAE,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9E,0x9F, +0xDF,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, +0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, +0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 037 code point; For A-Z, value is a-z; for a-z, value * is A-Z; all other code points map to themselves */ EXTCONST U8 PL_fold[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 140, 141, 142, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 156, 157, 158, 159, - 160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 203, 204, 205, 206, 207, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 219, 220, 221, 222, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, +0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC 037 code point; value is its other fold-pair equivalent * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is * the code point itself */ EXTCONST U8 PL_fold_latin1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 98, 99, 100, 101, 102, 103, 104, 105, 74, 75, 76, 77, 78, 79, - 80, 113, 114, 115, 116, 117, 118, 119, 120, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 66, 67, 68, 69, 70, 71, 72, 73, 106, 107, 108, 109, 110, 111, - 128, 81, 82, 83, 84, 85, 86, 87, 88, 121, 122, 123, 124, 125, 126, 127, - 112, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 172, 173, 174, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 158, 157, 156, 159, - 160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 140, 141, 142, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 235, 236, 237, 238, 239, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 251, 252, 253, 254, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 203, 204, 205, 206, 207, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 219, 220, 221, 222, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x80,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x70,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xAD,0xAE,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9C,0x9F, +0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0x8C,0x8D,0x8E,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; #endif /* EBCDIC 037 */ @@ -412,84 +444,92 @@ EXTCONST U8 PL_fold_latin1[] = { /* Index is ASCII platform code point; value is EBCDIC POSIX-BC equivalent */ EXTCONST U8 PL_a2e[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 21, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, - 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, - 124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 187, 188, 189, 106, 109, - 74, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 251, 79, 253, 255, 7, - 32, 33, 34, 35, 36, 37, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62, 95, - 65, 170, 176, 177, 159, 178, 208, 181, 121, 180, 154, 138, 186, 202, 175, 161, - 144, 143, 234, 250, 190, 160, 182, 179, 157, 218, 155, 139, 183, 184, 185, 171, - 100, 101, 98, 102, 99, 103, 158, 104, 116, 113, 114, 115, 120, 117, 118, 119, - 172, 105, 237, 238, 235, 239, 236, 191, 128, 224, 254, 221, 252, 173, 174, 89, - 68, 69, 66, 70, 67, 71, 156, 72, 84, 81, 82, 83, 88, 85, 86, 87, - 140, 73, 205, 206, 203, 207, 204, 225, 112, 192, 222, 219, 220, 141, 142, 223 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xBB,0xBC,0xBD,0x6A,0x6D, +0x4A,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xFB,0x4F,0xFD,0xFF,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0x5F, +0x41,0xAA,0xB0,0xB1,0x9F,0xB2,0xD0,0xB5,0x79,0xB4,0x9A,0x8A,0xBA,0xCA,0xAF,0xA1, +0x90,0x8F,0xEA,0xFA,0xBE,0xA0,0xB6,0xB3,0x9D,0xDA,0x9B,0x8B,0xB7,0xB8,0xB9,0xAB, +0x64,0x65,0x62,0x66,0x63,0x67,0x9E,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, +0xAC,0x69,0xED,0xEE,0xEB,0xEF,0xEC,0xBF,0x80,0xE0,0xFE,0xDD,0xFC,0xAD,0xAE,0x59, +0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, +0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xC0,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC POSIX-BC code point; value is ASCII platform equivalent */ EXTCONST U8 PL_e2a[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 10, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 133, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 226, 228, 224, 225, 227, 229, 231, 241, 96, 46, 60, 40, 43, 124, - 38, 233, 234, 235, 232, 237, 238, 239, 236, 223, 33, 36, 42, 41, 59, 159, - 45, 47, 194, 196, 192, 193, 195, 197, 199, 209, 94, 44, 37, 95, 62, 63, - 248, 201, 202, 203, 200, 205, 206, 207, 204, 168, 58, 35, 64, 39, 61, 34, - 216, 97, 98, 99, 100, 101, 102, 103, 104, 105, 171, 187, 240, 253, 254, 177, - 176, 106, 107, 108, 109, 110, 111, 112, 113, 114, 170, 186, 230, 184, 198, 164, - 181, 175, 115, 116, 117, 118, 119, 120, 121, 122, 161, 191, 208, 221, 222, 174, - 162, 163, 165, 183, 169, 167, 182, 188, 189, 190, 172, 91, 92, 93, 180, 215, - 249, 65, 66, 67, 68, 69, 70, 71, 72, 73, 173, 244, 246, 242, 243, 245, - 166, 74, 75, 76, 77, 78, 79, 80, 81, 82, 185, 251, 252, 219, 250, 255, - 217, 247, 83, 84, 85, 86, 87, 88, 89, 90, 178, 212, 214, 210, 211, 213, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 179, 123, 220, 125, 218, 126 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0x60,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x9F, +0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0x5E,0x2C,0x25,0x5F,0x3E,0x3F, +0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0xA8,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, +0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, +0xB5,0xAF,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0xDD,0xDE,0xAE, +0xA2,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xAC,0x5B,0x5C,0x5D,0xB4,0xD7, +0xF9,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, +0xA6,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xDB,0xFA,0xFF, +0xD9,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0x7B,0xDC,0x7D,0xDA,0x7E +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* (Confusingly named) Index is EBCDIC POSIX-BC I8 byte; value is * EBCDIC POSIX-BC UTF-EBCDIC equivalent */ EXTCONST U8 PL_utf2e[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 21, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, - 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, - 124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 187, 188, 189, 106, 109, - 74, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, - 151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 251, 79, 253, 255, 7, - 32, 33, 34, 35, 36, 37, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62, 95, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 98, 99, 100, 101, 102, 103, 104, 105, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, 128, 138, 139, 140, 141, 142, 143, 144, 154, 155, 156, 157, - 158, 159, 160, 161, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 190, 191, 192, 202, 203, 204, 205, 206, 207, 208, 218, - 219, 220, 221, 222, 223, 224, 225, 234, 235, 236, 237, 238, 239, 250, 252, 254 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xBB,0xBC,0xBD,0x6A,0x6D, +0x4A,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xFB,0x4F,0xFD,0xFF,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0x5F, +0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x51,0x52,0x53,0x54,0x55,0x56,0x57, +0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x70,0x71,0x72,0x73,0x74,0x75, +0x76,0x77,0x78,0x79,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C,0x9D, +0x9E,0x9F,0xA0,0xA1,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5, +0xB6,0xB7,0xB8,0xB9,0xBA,0xBE,0xBF,0xC0,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xD0,0xDA, +0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFC,0xFE +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* (Confusingly named) Index is EBCDIC POSIX-BC UTF-EBCDIC byte; value is * EBCDIC POSIX-BC I8 equivalent */ EXTCONST U8 PL_e2utf[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 10, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 133, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 161, 162, 163, 164, 165, 166, 167, 168, 96, 46, 60, 40, 43, 124, - 38, 169, 170, 171, 172, 173, 174, 175, 176, 177, 33, 36, 42, 41, 59, 159, - 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 94, 44, 37, 95, 62, 63, - 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 58, 35, 64, 39, 61, 34, - 196, 97, 98, 99, 100, 101, 102, 103, 104, 105, 197, 198, 199, 200, 201, 202, - 203, 106, 107, 108, 109, 110, 111, 112, 113, 114, 204, 205, 206, 207, 208, 209, - 210, 211, 115, 116, 117, 118, 119, 120, 121, 122, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 91, 92, 93, 229, 230, - 231, 65, 66, 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237, - 238, 74, 75, 76, 77, 78, 79, 80, 81, 82, 239, 240, 241, 242, 243, 244, - 245, 246, 83, 84, 85, 86, 87, 88, 89, 90, 247, 248, 249, 250, 251, 252, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 253, 123, 254, 125, 255, 126 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0x60,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0x21,0x24,0x2A,0x29,0x3B,0x9F, +0x2D,0x2F,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0x5E,0x2C,0x25,0x5F,0x3E,0x3F, +0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xC4,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA, +0xCB,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xCC,0xCD,0xCE,0xCF,0xD0,0xD1, +0xD2,0xD3,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9, +0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0xE4,0x5B,0x5C,0x5D,0xE5,0xE6, +0xE7,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, +0xEE,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xEF,0xF0,0xF1,0xF2,0xF3,0xF4, +0xF5,0xF6,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFD,0x7B,0xFE,0x7D,0xFF,0x7E +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC POSIX-BC UTF-EBCDIC byte; value is UTF8SKIP for start bytes; @@ -512,92 +552,100 @@ EXTCONST U8 PL_utf8skip[] = { 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 5, 5, 5, 5, 6, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 7, 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 14, 1 }; /* Index is EBCDIC POSIX-BC code point; value is its lowercase equivalent */ EXTCONST U8 PL_latin1_lc[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 66, 67, 68, 69, 70, 71, 72, 73, 106, 107, 108, 109, 110, 111, - 112, 81, 82, 83, 84, 85, 86, 87, 88, 121, 122, 123, 124, 125, 126, 127, - 112, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 156, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 140, 141, 142, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 203, 204, 205, 206, 207, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 219, 220, 219, 222, 223, - 192, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 203, 204, 205, 206, 207, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 220, 253, 222, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x70,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9C,0x9F, +0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0x8C,0x8D,0x8E,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDB,0xDE,0xDF, +0xC0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xDC,0xFD,0xDE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC POSIX-BC code point; value is its uppercase equivalent. * The 'mod' in the name means that codepoints whose uppercase is above 255 or * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ EXTCONST U8 PL_mod_latin1_uc[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 98, 99, 100, 101, 102, 103, 104, 105, 74, 75, 76, 77, 78, 79, - 80, 113, 114, 115, 116, 117, 118, 119, 120, 223, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 128, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 172, 173, 174, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 158, 157, 158, 159, - 223, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 224, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 235, 236, 237, 238, 239, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 221, 252, 221, 254, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0xDF,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x80,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xAD,0xAE,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9E,0x9F, +0xDF,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xE0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, +0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDD,0xFC,0xDD,0xFE,0xDF, +0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC POSIX-BC code point; For A-Z, value is a-z; for a-z, value * is A-Z; all other code points map to themselves */ EXTCONST U8 PL_fold[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 140, 141, 142, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 156, 157, 158, 159, - 160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 203, 204, 205, 206, 207, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 219, 220, 221, 222, 223, - 224, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, +0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; /* Index is EBCDIC POSIX-BC code point; value is its other fold-pair equivalent * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is * the code point itself */ EXTCONST U8 PL_fold_latin1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 98, 99, 100, 101, 102, 103, 104, 105, 74, 75, 76, 77, 78, 79, - 80, 113, 114, 115, 116, 117, 118, 119, 120, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 66, 67, 68, 69, 70, 71, 72, 73, 106, 107, 108, 109, 110, 111, - 128, 81, 82, 83, 84, 85, 86, 87, 88, 121, 122, 123, 124, 125, 126, 127, - 112, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 172, 173, 174, 143, - 144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 158, 157, 156, 159, - 160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 140, 141, 142, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 224, 129, 130, 131, 132, 133, 134, 135, 136, 137, 202, 235, 236, 237, 238, 239, - 208, 145, 146, 147, 148, 149, 150, 151, 152, 153, 218, 221, 252, 219, 254, 223, - 192, 225, 162, 163, 164, 165, 166, 167, 168, 169, 234, 203, 204, 205, 206, 207, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 220, 253, 222, 255 +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x80,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x70,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xAD,0xAE,0x8F, +0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9C,0x9F, +0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0x8C,0x8D,0x8E,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xE0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, +0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDD,0xFC,0xDB,0xFE,0xDF, +0xC0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xDC,0xFD,0xDE,0xFF +/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ }; #endif /* EBCDIC POSIX-BC */ diff --git a/embed.fnc b/embed.fnc index 9dd6bc3..dd764e1 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1170,7 +1170,7 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \ |NULLOK SV **listsvp|NULLOK SV **altsvp #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C) AMpR |SV* |_new_invlist_C_array|NN const UV* const list -: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b +EXMp |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b #endif Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \ |NN char* strend|NN char* strbeg \ @@ -1279,7 +1279,7 @@ Apmb |void |save_freepv |NULLOK char* pv Ap |void |save_generic_svref|NN SV** sptr Ap |void |save_generic_pvref|NN char** str Ap |void |save_shared_pvref|NN char** str -Ap |void |save_gp |NN GV* gv|I32 empty +Adp |void |save_gp |NN GV* gv|I32 empty Ap |HV* |save_hash |NN GV* gv Ap |void |save_hints Amp |void |save_helem |NN HV *hv|NN SV *key|NN SV **sptr @@ -1591,8 +1591,21 @@ EXMp |void |_invlist_dump |NN PerlIO *file|I32 level \ #endif Ap |void |taint_env Ap |void |taint_proper |NULLOK const char* f|NN const char *const s -Apd |UV |to_utf8_case |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp \ - |NN SV **swashp|NN const char *normal|NULLOK const char *special +Apd |UV |to_utf8_case |NN const U8 *p \ + |NN U8* ustrp \ + |NULLOK STRLEN *lenp \ + |NN SV **swashp \ + |NN const char *normal| \ + NULLOK const char *special +#if defined(PERL_IN_UTF8_C) +s |UV |_to_utf8_case |const UV uv1 \ + |NN const U8 *p \ + |NN U8* ustrp \ + |NULLOK STRLEN *lenp \ + |NN SV **swashp \ + |NN const char *normal \ + |NULLOK const char *special +#endif Abmd |UV |to_utf8_lower |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp AMp |UV |_to_utf8_lower_flags |NN const U8 *p|NN U8* ustrp \ |NULLOK STRLEN *lenp|bool flags diff --git a/embed.h b/embed.h index b41833b..75015fe 100644 --- a/embed.h +++ b/embed.h @@ -1031,6 +1031,9 @@ # if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) #define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d) # endif +# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C) +#define _invlistEQ(a,b,c) Perl__invlistEQ(aTHX_ a,b,c) +# endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) #define _load_PL_utf8_foldclosures() Perl__load_PL_utf8_foldclosures(aTHX) #define regprop(a,b,c,d,e) Perl_regprop(aTHX_ a,b,c,d,e) @@ -1756,6 +1759,7 @@ #define isa_lookup(a,b,c,d) S_isa_lookup(aTHX_ a,b,c,d) # endif # if defined(PERL_IN_UTF8_C) +#define _to_utf8_case(a,b,c,d,e,f,g) S__to_utf8_case(aTHX_ a,b,c,d,e,f,g) #define check_locale_boundary_crossing(a,b,c,d) S_check_locale_boundary_crossing(aTHX_ a,b,c,d) #define is_utf8_common(a,b,c,d) S_is_utf8_common(aTHX_ a,b,c,d) #define swash_scan_list_line(a,b,c,d,e,f,g) S_swash_scan_list_line(aTHX_ a,b,c,d,e,f,g) diff --git a/ext/DynaLoader/DynaLoader_pm.PL b/ext/DynaLoader/DynaLoader_pm.PL index 5166cc4..c2e860b 100644 --- a/ext/DynaLoader/DynaLoader_pm.PL +++ b/ext/DynaLoader/DynaLoader_pm.PL @@ -85,7 +85,7 @@ package DynaLoader; # Tim.Bunce@ig.co.uk, August 1994 BEGIN { - $VERSION = '1.36'; + $VERSION = '1.37'; } EOT diff --git a/ext/DynaLoader/dl_dllload.xs b/ext/DynaLoader/dl_dllload.xs index 84484d6..c63ffba 100644 --- a/ext/DynaLoader/dl_dllload.xs +++ b/ext/DynaLoader/dl_dllload.xs @@ -157,8 +157,9 @@ dl_find_symbol(libhandle, symbolname, ign_err=0) DLDEBUG(2, PerlIO_printf(Perl_debug_log, " symbolref = %lx\n", (unsigned long) retv)); ST(0) = sv_newmortal(); - if (retv == NULL) + if (retv == NULL) { if (!ign_err) SaveError(aTHX_ "%s", strerror(errno)); + } else sv_setiv( ST(0), PTR2IV(retv)); XSRETURN(1); diff --git a/ext/Hash-Util-FieldHash/FieldHash.xs b/ext/Hash-Util-FieldHash/FieldHash.xs index 46efec6..2fcb612 100644 --- a/ext/Hash-Util-FieldHash/FieldHash.xs +++ b/ext/Hash-Util-FieldHash/FieldHash.xs @@ -178,15 +178,6 @@ HUF_ask_trigger(pTHX_ SV *ob_id) { return NULL; } -/* get the trigger for an object, creating it if necessary */ -static SV * -HUF_get_trigger0(pTHX_ SV *obj, SV *ob_id) { - SV* trigger; - if (!(trigger = HUF_ask_trigger(aTHX_ ob_id))) - trigger = HUF_new_trigger(aTHX_ obj, ob_id); - return trigger; -} - static SV * HUF_get_trigger(pTHX_ SV *obj, SV *ob_id) { SV* trigger = HUF_ask_trigger(aTHX_ ob_id); diff --git a/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm b/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm index c2c4d4e..0d0b792 100644 --- a/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm +++ b/ext/Hash-Util-FieldHash/lib/Hash/Util/FieldHash.pm @@ -5,7 +5,7 @@ use strict; use warnings; use Scalar::Util qw( reftype); -our $VERSION = '1.18'; +our $VERSION = '1.19'; require Exporter; our @ISA = qw(Exporter); diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index 78ffd54..1522c4c 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -6,7 +6,7 @@ use strict; our($VERSION, @ISA, @EXPORT_OK); -$VERSION = "1.33"; +$VERSION = "1.34"; use Carp; use Exporter (); diff --git a/ext/Opcode/Opcode.xs b/ext/Opcode/Opcode.xs index c1170e9..936ffba 100644 --- a/ext/Opcode/Opcode.xs +++ b/ext/Opcode/Opcode.xs @@ -13,7 +13,7 @@ typedef struct { HV * x_op_named_bits; /* cache shared for whole process */ SV * x_opset_all; /* mask with all bits set */ IV x_opset_len; /* length of opmasks in bytes */ -#if 0 +#ifdef OPCODE_DEBUG int x_opcode_debug; /* unused warn() emitting debugging code */ #endif } my_cxt_t; @@ -23,7 +23,7 @@ START_MY_CXT #define op_named_bits (MY_CXT.x_op_named_bits) #define opset_all (MY_CXT.x_opset_all) #define opset_len (MY_CXT.x_opset_len) -#if 0 +#ifdef OPCODE_DEBUG # define opcode_debug (MY_CXT.x_opcode_debug) #else /* no API to turn this on at runtime, so constant fold the code away */ @@ -227,7 +227,9 @@ static void opmask_addlocal(pTHX_ SV *opset, char *op_mask_buf) /* Localise PL_op_mask then opmask_add() */ { char *orig_op_mask = PL_op_mask; +#ifdef OPCODE_DEBUG dMY_CXT; +#endif SAVEVPTR(PL_op_mask); /* XXX casting to an ordinary function ptr from a member function ptr diff --git a/ext/POSIX/t/is.t b/ext/POSIX/t/is.t index 6eb64fd..1625e03 100644 --- a/ext/POSIX/t/is.t +++ b/ext/POSIX/t/is.t @@ -8,13 +8,15 @@ BEGIN { plan(skip_all => "\$^O eq '$^O'") if $^O eq 'VMS'; plan(skip_all => "POSIX is unavailable") unless $Config{extensions} =~ /\bPOSIX\b/; + unshift @INC, "../../t"; + require 'loc_tools.pl'; } use POSIX; # E.g. \t might or might not be isprint() depending on the locale, # so let's reset to the default. -setlocale(LC_ALL, 'C') if $Config{d_setlocale}; +setlocale(LC_ALL, 'C') if locales_enabled('LC_ALL'); $| = 1; diff --git a/ext/POSIX/t/posix.t b/ext/POSIX/t/posix.t index 0bafb8f..0e5f086 100644 --- a/ext/POSIX/t/posix.t +++ b/ext/POSIX/t/posix.t @@ -164,7 +164,7 @@ like( getcwd(), qr/$pat/, 'getcwd' ); SKIP: { skip("strtod() not present", 2) unless $Config{d_strtod}; - if ($Config{d_setlocale}) { + if (locales_enabled('LC_NUMERIC')) { $lc = &POSIX::setlocale(&POSIX::LC_NUMERIC); &POSIX::setlocale(&POSIX::LC_NUMERIC, 'C'); } @@ -174,13 +174,13 @@ SKIP: { cmp_ok(abs("3.14159" - $n), '<', 1e-6, 'strtod works'); is($x, 6, 'strtod works'); - &POSIX::setlocale(&POSIX::LC_NUMERIC, $lc) if $Config{d_setlocale}; + &POSIX::setlocale(&POSIX::LC_NUMERIC, $lc) if locales_enabled('LC_NUMERIC'); } SKIP: { skip("strtold() not present", 2) unless $Config{d_strtold}; - if ($Config{d_setlocale}) { + if (locales_enabled('LC_NUMERIC')) { $lc = &POSIX::setlocale(&POSIX::LC_NUMERIC); &POSIX::setlocale(&POSIX::LC_NUMERIC, 'C'); } @@ -190,7 +190,7 @@ SKIP: { cmp_ok(abs("2.718" - $n), '<', 1e-6, 'strtold works'); is($x, 4, 'strtold works'); - &POSIX::setlocale(&POSIX::LC_NUMERIC, $lc) if $Config{d_setlocale}; + &POSIX::setlocale(&POSIX::LC_NUMERIC, $lc) if locales_enabled('LC_NUMERIC'); } SKIP: { @@ -227,7 +227,7 @@ sub try_strftime { is($got, $expect, "validating mini_mktime() and strftime(): $expect"); } -if ($Config{d_setlocale}) { +if (locales_enabled('LC_TIME')) { $lc = &POSIX::setlocale(&POSIX::LC_TIME); &POSIX::setlocale(&POSIX::LC_TIME, 'C'); } @@ -266,7 +266,7 @@ try_strftime("Fri Mar 31 00:00:00 2000 091", 0,0,0, 31,2,100); try_strftime("Thu Dec 30 00:00:00 1999 364", 0,0,0, -1,0,100,0,10); } -&POSIX::setlocale(&POSIX::LC_TIME, $lc) if $Config{d_setlocale}; +&POSIX::setlocale(&POSIX::LC_TIME, $lc) if locales_enabled('LC_TIME'); { for my $test (0, 1) { diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t index 472624f..6a906e0 100644 --- a/ext/POSIX/t/time.t +++ b/ext/POSIX/t/time.t @@ -1,5 +1,10 @@ #!perl -w +BEGIN { + unshift @INC, "../../t"; + require 'loc_tools.pl'; +} + use strict; use Config; @@ -48,7 +53,7 @@ is(asctime(POSIX::localtime(12345678)), ctime(12345678), # Careful! strftime() is locale sensitive. Let's take care of that my $orig_loc = 'C'; -if ( $Config{d_setlocale} ) { +if (locales_enabled('LC_TIME')) { $orig_loc = setlocale(LC_TIME) || die "Cannot get locale information: $!"; setlocale(LC_TIME, "C") || die "Cannot setlocale() to C: $!"; } @@ -72,7 +77,7 @@ is(ord strftime($ss, POSIX::localtime(time)), 223, 'Format string has correct character'); unlike($ss, qr/\w/, 'Still not internally UTF-8 encoded'); -if ( $Config{d_setlocale} ) { +if (locales_enabled('LC_TIME')) { setlocale(LC_TIME, $orig_loc) || die "Cannot setlocale() back to orig: $!"; } diff --git a/ext/POSIX/t/wrappers.t b/ext/POSIX/t/wrappers.t index e10a921..f09b925 100644 --- a/ext/POSIX/t/wrappers.t +++ b/ext/POSIX/t/wrappers.t @@ -23,7 +23,7 @@ my $temp_file = $temp_fh->filename; # exit, fork, waitpid, sleep in waitpid.t # errno in posix.t -if ($Config{d_setlocale} && defined &POSIX::LC_MESSAGES) { +if (locales_enabled('LC_MESSAGES')) { my $non_english_locale; local $! = 1; my $english_message = "$!"; # Should be C locale since not in scope of diff --git a/ext/PerlIO-mmap/mmap.pm b/ext/PerlIO-mmap/mmap.pm index 939b94f..0ed59d2 100644 --- a/ext/PerlIO-mmap/mmap.pm +++ b/ext/PerlIO-mmap/mmap.pm @@ -1,7 +1,7 @@ package PerlIO::mmap; use strict; use warnings; -our $VERSION = '0.015'; +our $VERSION = '0.016'; use XSLoader; XSLoader::load(__PACKAGE__, __PACKAGE__->VERSION); diff --git a/ext/PerlIO-mmap/mmap.xs b/ext/PerlIO-mmap/mmap.xs index ff554e1..b3f1c4f 100644 --- a/ext/PerlIO-mmap/mmap.xs +++ b/ext/PerlIO-mmap/mmap.xs @@ -249,22 +249,6 @@ PerlIOMmap_fill(pTHX_ PerlIO *f) return code; } -static IV -PerlIOMmap_close(pTHX_ PerlIO *f) -{ - PerlIOMmap * const m = PerlIOSelf(f, PerlIOMmap); - PerlIOBuf * const b = &m->base; - IV code = PerlIO_flush(f); - if (m->bbuf) { - b->buf = m->bbuf; - m->bbuf = NULL; - b->ptr = b->end = b->buf; - } - if (PerlIOBuf_close(aTHX_ f) != 0) - code = -1; - return code; -} - static PerlIO * PerlIOMmap_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *param, int flags) { diff --git a/ext/SDBM_File/SDBM_File.pm b/ext/SDBM_File/SDBM_File.pm index 707a9f8..5df9085 100644 --- a/ext/SDBM_File/SDBM_File.pm +++ b/ext/SDBM_File/SDBM_File.pm @@ -7,7 +7,7 @@ require Tie::Hash; require XSLoader; our @ISA = qw(Tie::Hash); -our $VERSION = "1.13"; +our $VERSION = "1.14"; our @EXPORT_OK = qw(PAGFEXT DIRFEXT PAIRMAX); use Exporter "import"; diff --git a/ext/SDBM_File/SDBM_File.xs b/ext/SDBM_File/SDBM_File.xs index 434784a..0df2855 100644 --- a/ext/SDBM_File/SDBM_File.xs +++ b/ext/SDBM_File/SDBM_File.xs @@ -29,6 +29,8 @@ typedef datum datum_value ; MODULE = SDBM_File PACKAGE = SDBM_File PREFIX = sdbm_ +PROTOTYPES: DISABLE + SDBM_File sdbm_TIEHASH(dbtype, filename, flags, mode, pagname=NULL) char * dbtype diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 14cb34e..0fe79e8 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -5,7 +5,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.76'; +our $VERSION = '0.77'; require XSLoader; diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index b48f274..ebdef68 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -1393,6 +1393,54 @@ test_utf8n_to_uvchr(s, len, flags) OUTPUT: RETVAL +AV * +test_valid_utf8_to_uvchr(s) + + SV *s + PREINIT: + STRLEN retlen; + UV ret; + STRLEN slen; + + CODE: + /* Call utf8n_to_uvchr() with the inputs. It always asks for the + * actual length to be returned + * + * Length to assume is; not checked, so could have buffer overflow + */ + RETVAL = newAV(); + sv_2mortal((SV*)RETVAL); + + ret + = valid_utf8_to_uvchr((U8*) SvPV(s, slen), &retlen); + + /* Returns the return value in [0]; in [1] */ + av_push(RETVAL, newSVuv(ret)); + av_push(RETVAL, newSVuv(retlen)); + + OUTPUT: + RETVAL + +SV * +test_uvchr_to_utf8_flags(uv, flags) + + SV *uv + SV *flags + PREINIT: + U8 dest[UTF8_MAXBYTES]; + U8 *ret; + + CODE: + /* Call uvchr_to_utf8_flags() with the inputs. */ + ret = uvchr_to_utf8_flags(dest, SvUV(uv), SvUV(flags)); + if (! ret) { + XSRETURN_UNDEF; + } + RETVAL = newSVpvn((char *) dest, ret - dest); + + OUTPUT: + RETVAL + MODULE = XS::APItest:Overload PACKAGE = XS::APItest::Overload void diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t index eea81e8..359769a 100644 --- a/ext/XS-APItest/t/handy.t +++ b/ext/XS-APItest/t/handy.t @@ -1,7 +1,8 @@ #!perl -w BEGIN { - require 'loc_tools.pl'; # Contains find_utf8_ctype_locale() + require 'loc_tools.pl'; # Contains locales_enabled() and + # find_utf8_ctype_locale() } use strict; @@ -18,7 +19,7 @@ sub truth($) { # Converts values so is() works my $locale; my $utf8_locale; -if($Config{d_setlocale}) { +if(locales_enabled('LC_ALL')) { require POSIX; $locale = POSIX::setlocale( &POSIX::LC_ALL, "C"); if (defined $locale && $locale eq 'C') { diff --git a/ext/XS-APItest/t/locale.t b/ext/XS-APItest/t/locale.t index 42fdab8..be594b0 100644 --- a/ext/XS-APItest/t/locale.t +++ b/ext/XS-APItest/t/locale.t @@ -6,15 +6,10 @@ BEGIN { use XS::APItest; use Config; -BEGIN { - eval { require POSIX; POSIX->import("locale_h") }; - if ($@) { - skip_all("could not load the POSIX module"); # running minitest? - } -} +skip_all("locales not available") unless locales_enabled('LC_NUMERIC'); my @locales = eval { find_locales( &LC_NUMERIC ) }; -skip_all("no locales available") unless @locales; +skip_all("no LC_NUMERIC locales available") unless @locales; my $non_dot_locale; for (@locales) { diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index 2984075..9b5ed9b 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -2,11 +2,103 @@ use strict; use Test::More; +$|=1; + +no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit + # machines, and that is tested elsewhere use XS::APItest; my $pound_sign = chr utf8::unicode_to_native(163); +sub isASCII { ord "A" == 65 } + +sub display_bytes { + my $string = shift; + return '"' + . join("", map { sprintf("\\x%02x", ord $_) } split "", $string) + . '"'; +} + +# This test file can't use byte_utf8a_to_utf8n() from t/charset_tools.pl +# because that uses the same functions we are testing here. So UTF-EBCDIC +# strings are hard-coded as I8 strings in this file instead, and we use array +# lookup to translate into the appropriate code page. + +my @i8_to_native = ( # Only code page 1047 so far. +# _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F +0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, +0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, +0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, +0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, +0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, +0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, +0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, +0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, +0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, +0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, +0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, +0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE, +); + +*I8_to_native = (isASCII) + ? sub { return shift } + : sub { return join "", map { chr $i8_to_native[ord $_] } + split "", shift }; + +my $is64bit = length sprintf("%x", ~0) > 8; + + +# Test utf8n_to_uvchr(). These provide essentially complete code coverage. +# Copied from utf8.h +my $UTF8_ALLOW_EMPTY = 0x0001; +my $UTF8_ALLOW_CONTINUATION = 0x0002; +my $UTF8_ALLOW_NON_CONTINUATION = 0x0004; +my $UTF8_ALLOW_SHORT = 0x0008; +my $UTF8_ALLOW_LONG = 0x0010; +my $UTF8_DISALLOW_SURROGATE = 0x0020; +my $UTF8_WARN_SURROGATE = 0x0040; +my $UTF8_DISALLOW_NONCHAR = 0x0080; +my $UTF8_WARN_NONCHAR = 0x0100; +my $UTF8_DISALLOW_SUPER = 0x0200; +my $UTF8_WARN_SUPER = 0x0400; +my $UTF8_DISALLOW_ABOVE_31_BIT = 0x0800; +my $UTF8_WARN_ABOVE_31_BIT = 0x1000; +my $UTF8_CHECK_ONLY = 0x2000; + +# Test uvchr_to_utf8(). +my $UNICODE_WARN_SURROGATE = 0x0001; +my $UNICODE_WARN_NONCHAR = 0x0002; +my $UNICODE_WARN_SUPER = 0x0004; +my $UNICODE_WARN_ABOVE_31_BIT = 0x0008; +my $UNICODE_DISALLOW_SURROGATE = 0x0010; +my $UNICODE_DISALLOW_NONCHAR = 0x0020; +my $UNICODE_DISALLOW_SUPER = 0x0040; +my $UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080; + +my $look_for_everything_utf8n_to + = $UTF8_DISALLOW_SURROGATE + | $UTF8_WARN_SURROGATE + | $UTF8_DISALLOW_NONCHAR + | $UTF8_WARN_NONCHAR + | $UTF8_DISALLOW_SUPER + | $UTF8_WARN_SUPER + | $UTF8_DISALLOW_ABOVE_31_BIT + | $UTF8_WARN_ABOVE_31_BIT; +my $look_for_everything_uvchr_to + = $UNICODE_DISALLOW_SURROGATE + | $UNICODE_WARN_SURROGATE + | $UNICODE_DISALLOW_NONCHAR + | $UNICODE_WARN_NONCHAR + | $UNICODE_DISALLOW_SUPER + | $UNICODE_WARN_SUPER + | $UNICODE_DISALLOW_ABOVE_31_BIT + | $UNICODE_WARN_ABOVE_31_BIT; + foreach ([0, '', '', 'empty'], [0, 'N', 'N', '1 char'], [1, 'NN', 'N', '1 char substring'], @@ -26,68 +118,381 @@ foreach ([0, '', '', 'empty'], is(bytes_cmp_utf8($right, $left), -$expect, "$desc reversed"); } -my $isASCII = (ord("A") == 65); -if ($isASCII) { # EBCDIC is too hard to test for malformations +# The keys to this hash are Unicode code points, their values are the native +# UTF-8 representations of them. The code points are chosen because they are +# "interesting" on either or both ASCII and EBCDIC platforms. First we add +# boundaries where the number of bytes required to represent them increase, or +# are adjacent to problematic code points, so we want to make sure they aren't +# considered problematic. +my %code_points = ( + 0x0100 => (isASCII) ? "\xc4\x80" : I8_to_native("\xc8\xa0"), + 0x0400 - 1 => (isASCII) ? "\xcf\xbf" : I8_to_native("\xdf\xbf"), + 0x0400 => (isASCII) ? "\xd0\x80" : I8_to_native("\xe1\xa0\xa0"), + 0x0800 - 1 => (isASCII) ? "\xdf\xbf" : I8_to_native("\xe1\xbf\xbf"), + 0x0800 => (isASCII) ? "\xe0\xa0\x80" : I8_to_native("\xe2\xa0\xa0"), + 0x4000 - 1 => (isASCII) ? "\xe3\xbf\xbf" : I8_to_native("\xef\xbf\xbf"), + 0x4000 => (isASCII) ? "\xe4\x80\x80" : I8_to_native("\xf0\xb0\xa0\xa0"), + 0x8000 - 1 => (isASCII) ? "\xe7\xbf\xbf" : I8_to_native("\xf0\xbf\xbf\xbf"), -# Test uft8n_to_uvchr(). These provide essentially complete code coverage. + # First code point that the implementation of isUTF8_POSSIBLY_PROBLEMATIC, + # as of this writing, considers potentially problematic on EBCDIC + 0x8000 => (isASCII) ? "\xe8\x80\x80" : I8_to_native("\xf1\xa0\xa0\xa0"), -# Copied from utf8.h -my $UTF8_ALLOW_EMPTY = 0x0001; -my $UTF8_ALLOW_CONTINUATION = 0x0002; -my $UTF8_ALLOW_NON_CONTINUATION = 0x0004; -my $UTF8_ALLOW_SHORT = 0x0008; -my $UTF8_ALLOW_LONG = 0x0010; -my $UTF8_DISALLOW_SURROGATE = 0x0020; -my $UTF8_WARN_SURROGATE = 0x0040; -my $UTF8_DISALLOW_NONCHAR = 0x0080; -my $UTF8_WARN_NONCHAR = 0x0100; -my $UTF8_DISALLOW_SUPER = 0x0200; -my $UTF8_WARN_SUPER = 0x0400; -my $UTF8_DISALLOW_FE_FF = 0x0800; -my $UTF8_WARN_FE_FF = 0x1000; -my $UTF8_CHECK_ONLY = 0x2000; + 0xD000 - 1 => (isASCII) ? "\xec\xbf\xbf" : I8_to_native("\xf1\xb3\xbf\xbf"), -my $REPLACEMENT = 0xFFFD; + # First code point that the implementation of isUTF8_POSSIBLY_PROBLEMATIC, + # as of this writing, considers potentially problematic on ASCII + 0xD000 => (isASCII) ? "\xed\x80\x80" : I8_to_native("\xf1\xb4\xa0\xa0"), + + # Bracket the surrogates + 0xD7FF => (isASCII) ? "\xed\x9f\xbf" : I8_to_native("\xf1\xb5\xbf\xbf"), + 0xE000 => (isASCII) ? "\xee\x80\x80" : I8_to_native("\xf1\xb8\xa0\xa0"), + + # Bracket the 32 contiguous non characters + 0xFDCF => (isASCII) ? "\xef\xb7\x8f" : I8_to_native("\xf1\xbf\xae\xaf"), + 0xFDF0 => (isASCII) ? "\xef\xb7\xb0" : I8_to_native("\xf1\xbf\xaf\xb0"), + + # Mostly bracket non-characters, but some are transitions to longer + # strings + 0xFFFD => (isASCII) ? "\xef\xbf\xbd" : I8_to_native("\xf1\xbf\xbf\xbd"), + 0x10000 - 1 => (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), + 0x10000 => (isASCII) ? "\xf0\x90\x80\x80" : I8_to_native("\xf2\xa0\xa0\xa0"), + 0x1FFFD => (isASCII) ? "\xf0\x9f\xbf\xbd" : I8_to_native("\xf3\xbf\xbf\xbd"), + 0x20000 => (isASCII) ? "\xf0\xa0\x80\x80" : I8_to_native("\xf4\xa0\xa0\xa0"), + 0x2FFFD => (isASCII) ? "\xf0\xaf\xbf\xbd" : I8_to_native("\xf5\xbf\xbf\xbd"), + 0x30000 => (isASCII) ? "\xf0\xb0\x80\x80" : I8_to_native("\xf6\xa0\xa0\xa0"), + 0x3FFFD => (isASCII) ? "\xf0\xbf\xbf\xbd" : I8_to_native("\xf7\xbf\xbf\xbd"), + 0x40000 - 1 => (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), + 0x40000 => (isASCII) ? "\xf1\x80\x80\x80" : I8_to_native("\xf8\xa8\xa0\xa0\xa0"), + 0x4FFFD => (isASCII) ? "\xf1\x8f\xbf\xbd" : I8_to_native("\xf8\xa9\xbf\xbf\xbd"), + 0x50000 => (isASCII) ? "\xf1\x90\x80\x80" : I8_to_native("\xf8\xaa\xa0\xa0\xa0"), + 0x5FFFD => (isASCII) ? "\xf1\x9f\xbf\xbd" : I8_to_native("\xf8\xab\xbf\xbf\xbd"), + 0x60000 => (isASCII) ? "\xf1\xa0\x80\x80" : I8_to_native("\xf8\xac\xa0\xa0\xa0"), + 0x6FFFD => (isASCII) ? "\xf1\xaf\xbf\xbd" : I8_to_native("\xf8\xad\xbf\xbf\xbd"), + 0x70000 => (isASCII) ? "\xf1\xb0\x80\x80" : I8_to_native("\xf8\xae\xa0\xa0\xa0"), + 0x7FFFD => (isASCII) ? "\xf1\xbf\xbf\xbd" : I8_to_native("\xf8\xaf\xbf\xbf\xbd"), + 0x80000 => (isASCII) ? "\xf2\x80\x80\x80" : I8_to_native("\xf8\xb0\xa0\xa0\xa0"), + 0x8FFFD => (isASCII) ? "\xf2\x8f\xbf\xbd" : I8_to_native("\xf8\xb1\xbf\xbf\xbd"), + 0x90000 => (isASCII) ? "\xf2\x90\x80\x80" : I8_to_native("\xf8\xb2\xa0\xa0\xa0"), + 0x9FFFD => (isASCII) ? "\xf2\x9f\xbf\xbd" : I8_to_native("\xf8\xb3\xbf\xbf\xbd"), + 0xA0000 => (isASCII) ? "\xf2\xa0\x80\x80" : I8_to_native("\xf8\xb4\xa0\xa0\xa0"), + 0xAFFFD => (isASCII) ? "\xf2\xaf\xbf\xbd" : I8_to_native("\xf8\xb5\xbf\xbf\xbd"), + 0xB0000 => (isASCII) ? "\xf2\xb0\x80\x80" : I8_to_native("\xf8\xb6\xa0\xa0\xa0"), + 0xBFFFD => (isASCII) ? "\xf2\xbf\xbf\xbd" : I8_to_native("\xf8\xb7\xbf\xbf\xbd"), + 0xC0000 => (isASCII) ? "\xf3\x80\x80\x80" : I8_to_native("\xf8\xb8\xa0\xa0\xa0"), + 0xCFFFD => (isASCII) ? "\xf3\x8f\xbf\xbd" : I8_to_native("\xf8\xb9\xbf\xbf\xbd"), + 0xD0000 => (isASCII) ? "\xf3\x90\x80\x80" : I8_to_native("\xf8\xba\xa0\xa0\xa0"), + 0xDFFFD => (isASCII) ? "\xf3\x9f\xbf\xbd" : I8_to_native("\xf8\xbb\xbf\xbf\xbd"), + 0xE0000 => (isASCII) ? "\xf3\xa0\x80\x80" : I8_to_native("\xf8\xbc\xa0\xa0\xa0"), + 0xEFFFD => (isASCII) ? "\xf3\xaf\xbf\xbd" : I8_to_native("\xf8\xbd\xbf\xbf\xbd"), + 0xF0000 => (isASCII) ? "\xf3\xb0\x80\x80" : I8_to_native("\xf8\xbe\xa0\xa0\xa0"), + 0xFFFFD => (isASCII) ? "\xf3\xbf\xbf\xbd" : I8_to_native("\xf8\xbf\xbf\xbf\xbd"), + 0x100000 => (isASCII) ? "\xf4\x80\x80\x80" : I8_to_native("\xf9\xa0\xa0\xa0\xa0"), + 0x10FFFD => (isASCII) ? "\xf4\x8f\xbf\xbd" : I8_to_native("\xf9\xa1\xbf\xbf\xbd"), + 0x110000 => (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), + + # Things that would be noncharacters if they were in Unicode, and might be + # mistaken, if the C code is bad, to be nonchars + 0x11FFFE => (isASCII) ? "\xf4\x9f\xbf\xbe" : I8_to_native("\xf9\xa3\xbf\xbf\xbe"), + 0x11FFFF => (isASCII) ? "\xf4\x9f\xbf\xbf" : I8_to_native("\xf9\xa3\xbf\xbf\xbf"), + 0x20FFFE => (isASCII) ? "\xf8\x88\x8f\xbf\xbe" : I8_to_native("\xfa\xa1\xbf\xbf\xbe"), + 0x20FFFF => (isASCII) ? "\xf8\x88\x8f\xbf\xbf" : I8_to_native("\xfa\xa1\xbf\xbf\xbf"), + + 0x200000 - 1 => (isASCII) ? "\xf7\xbf\xbf\xbf" : I8_to_native("\xf9\xbf\xbf\xbf\xbf"), + 0x200000 => (isASCII) ? "\xf8\x88\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), + 0x400000 - 1 => (isASCII) ? "\xf8\x8f\xbf\xbf\xbf" : I8_to_native("\xfb\xbf\xbf\xbf\xbf"), + 0x400000 => (isASCII) ? "\xf8\x90\x80\x80\x80" : I8_to_native("\xfc\xa4\xa0\xa0\xa0\xa0"), + 0x4000000 - 1 => (isASCII) ? "\xfb\xbf\xbf\xbf\xbf" : I8_to_native("\xfd\xbf\xbf\xbf\xbf\xbf"), + 0x4000000 => (isASCII) ? "\xfc\x84\x80\x80\x80\x80" : I8_to_native("\xfe\xa2\xa0\xa0\xa0\xa0\xa0"), + 0x4000000 - 1 => (isASCII) ? "\xfb\xbf\xbf\xbf\xbf" : I8_to_native("\xfd\xbf\xbf\xbf\xbf\xbf"), + 0x4000000 => (isASCII) ? "\xfc\x84\x80\x80\x80\x80" : I8_to_native("\xfe\xa2\xa0\xa0\xa0\xa0\xa0"), + 0x40000000 - 1 => (isASCII) ? "\xfc\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xfe\xbf\xbf\xbf\xbf\xbf\xbf"), + 0x40000000 => (isASCII) ? "\xfd\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0"), + 0x80000000 - 1 => (isASCII) ? "\xfd\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xbf\xbf\xbf\xbf\xbf\xbf"), + 0x80000000 => (isASCII) ? "\xfe\x82\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + 0xFFFFFFFF => (isASCII) ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), +); + +if ($is64bit) { + no warnings qw(overflow portable); + $code_points{0x100000000} = (isASCII) ? "\xfe\x84\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"); + $code_points{0x1000000000 - 1} = (isASCII) ? "\xfe\xbf\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa1\xbf\xbf\xbf\xbf\xbf\xbf\xbf"); + $code_points{0x1000000000} = (isASCII) ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"); + $code_points{0xFFFFFFFFFFFFFFFF} = (isASCII) ? "\xff\x80\x8f\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xaf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"); +} + +# Now add in entries for each of code points 0-255, which require special +# handling on EBCDIC. Remember the keys are Unicode values, and the values +# are the native UTF-8. For invariants, the bytes are just the native chr. + +my $cp = 0; +while ($cp < ((isASCII) ? 128 : 160)) { # This is from the definition of + # invariant + $code_points{$cp} = chr utf8::unicode_to_native($cp); + $cp++; +} + +# Done with the invariants. Now do the variants. All in this range are 2 +# byte. Again, we can't use the internal functions to generate UTF-8, as +# those are what we are trying to test. In the loop, we know what range the +# continuation bytes can be in, and what the lowest start byte can be. So we +# cycle through them. + +my $first_continuation = (isASCII) ? 0x80 : 0xA0; +my $final_continuation = 0xBF; +my $start = (isASCII) ? 0xC2 : 0xC5; + +my $continuation = $first_continuation - 1; + +while ($cp < 255) { + if (++$continuation > $final_continuation) { + + # Wrap to the next start byte when we reach the final continuation + # byte possible + $continuation = $first_continuation; + $start++; + } + $code_points{$cp} = I8_to_native(chr($start) . chr($continuation)); + + $cp++; +} my @warnings; use warnings 'utf8'; local $SIG{__WARN__} = sub { push @warnings, @_ }; -# First test the malformations. All these raise category utf8 warnings. -foreach my $test ( +# This set of tests looks for basic sanity, and lastly tests the bottom level +# decode routine for the given code point. If the earlier tests for that code +# point fail, that one probably will too. Malformations are tested in later +# segments of code. +for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } + keys %code_points) +{ + my $hex_u = sprintf("0x%02X", $u); + my $n = utf8::unicode_to_native($u); + my $hex_n = sprintf("0x%02X", $n); + my $bytes = $code_points{$u}; + + my $offskip_should_be; + { + no warnings qw(overflow portable); + $offskip_should_be = (isASCII) + ? ( $u < 0x80 ? 1 : + $u < 0x800 ? 2 : + $u < 0x10000 ? 3 : + $u < 0x200000 ? 4 : + $u < 0x4000000 ? 5 : + $u < 0x80000000 ? 6 : (($is64bit) + ? ($u < 0x1000000000 ? 7 : 13) + : 7) + ) + : ($u < 0xA0 ? 1 : + $u < 0x400 ? 2 : + $u < 0x4000 ? 3 : + $u < 0x40000 ? 4 : + $u < 0x400000 ? 5 : + $u < 0x4000000 ? 6 : + $u < 0x40000000 ? 7 : 14 ); + } + + # If this test fails, subsequent ones are meaningless. + next unless is(test_OFFUNISKIP($u), $offskip_should_be, + "Verify OFFUNISKIP($hex_u) is $offskip_should_be"); + my $invariant = $offskip_should_be == 1; + my $display_invariant = $invariant || 0; + is(test_OFFUNI_IS_INVARIANT($u), $invariant, + "Verify OFFUNI_IS_INVARIANT($hex_u) is $display_invariant"); + + my $uvchr_skip_should_be = $offskip_should_be; + next unless is(test_UVCHR_SKIP($n), $uvchr_skip_should_be, + "Verify UVCHR_SKIP($hex_n) is $uvchr_skip_should_be"); + is(test_UVCHR_IS_INVARIANT($n), $offskip_should_be == 1, + "Verify UVCHR_IS_INVARIANT($hex_n) is $display_invariant"); + + my $n_chr = chr $n; + utf8::upgrade $n_chr; + + is(test_UTF8_SKIP($n_chr), $uvchr_skip_should_be, + "Verify UTF8_SKIP(chr $hex_n) is $uvchr_skip_should_be"); + + use bytes; + for (my $j = 0; $j < length $n_chr; $j++) { + my $b = substr($n_chr, $j, 1); + my $hex_b = sprintf("\"\\x%02x\"", ord $b); + + my $byte_invariant = $j == 0 && $uvchr_skip_should_be == 1; + my $display_byte_invariant = $byte_invariant || 0; + next unless is(test_UTF8_IS_INVARIANT($b), $byte_invariant, + " Verify UTF8_IS_INVARIANT($hex_b) for byte $j " + . "is $display_byte_invariant"); + + my $is_start = $j == 0 && $uvchr_skip_should_be > 1; + my $display_is_start = $is_start || 0; + next unless is(test_UTF8_IS_START($b), $is_start, + " Verify UTF8_IS_START($hex_b) is $display_is_start"); + + my $is_continuation = $j != 0 && $uvchr_skip_should_be > 1; + my $display_is_continuation = $is_continuation || 0; + next unless is(test_UTF8_IS_CONTINUATION($b), $is_continuation, + " Verify UTF8_IS_CONTINUATION($hex_b) is " + . "$display_is_continuation"); + + my $is_continued = $uvchr_skip_should_be > 1; + my $display_is_continued = $is_continued || 0; + next unless is(test_UTF8_IS_CONTINUED($b), $is_continued, + " Verify UTF8_IS_CONTINUED($hex_b) is " + . "$display_is_continued"); + + my $is_downgradeable_start = $n < 256 + && $uvchr_skip_should_be > 1 + && $j == 0; + my $display_is_downgradeable_start = $is_downgradeable_start || 0; + next unless is(test_UTF8_IS_DOWNGRADEABLE_START($b), + $is_downgradeable_start, + " Verify UTF8_IS_DOWNGRADEABLE_START($hex_b) is " + . "$display_is_downgradeable_start"); + + my $is_above_latin1 = $n > 255 && $j == 0; + my $display_is_above_latin1 = $is_above_latin1 || 0; + next unless is(test_UTF8_IS_ABOVE_LATIN1($b), + $is_above_latin1, + " Verify UTF8_IS_ABOVE_LATIN1($hex_b) is " + . "$display_is_above_latin1"); + + my $is_possibly_problematic = $j == 0 + && $n >= ((isASCII) + ? 0xD000 + : 0x8000); + my $display_is_possibly_problematic = $is_possibly_problematic || 0; + next unless is(test_isUTF8_POSSIBLY_PROBLEMATIC($b), + $is_possibly_problematic, + " Verify isUTF8_POSSIBLY_PROBLEMATIC($hex_b) is " + . "$display_is_above_latin1"); + } + + # We are not trying to look for warnings, etc, so if they should occur, it + # is an error. But some of the code points here do cause warnings, so we + # check here and turn off the ones that apply to such code points. A + # later section of the code tests for these kinds of things. + my $this_utf8_flags = $look_for_everything_utf8n_to; + my $len = length $bytes; + if ($n > 2 ** 31 - 1) { + $this_utf8_flags &= + ~($UTF8_DISALLOW_ABOVE_31_BIT|$UTF8_WARN_ABOVE_31_BIT); + } + if ($n > 0x10FFFF) { + $this_utf8_flags &= ~($UTF8_DISALLOW_SUPER|$UTF8_WARN_SUPER); + } + elsif (($n & 0xFFFE) == 0xFFFE) { + $this_utf8_flags &= ~($UTF8_DISALLOW_NONCHAR|$UTF8_WARN_NONCHAR); + } + + undef @warnings; + + my $display_flags = sprintf "0x%x", $this_utf8_flags; + my $ret_ref = test_utf8n_to_uvchr($bytes, $len, $this_utf8_flags); + my $display_bytes = display_bytes($bytes); + is($ret_ref->[0], $n, "Verify utf8n_to_uvchr($display_bytes, $display_flags) returns $hex_n"); + is($ret_ref->[1], $len, "Verify utf8n_to_uvchr() for $hex_n returns expected length"); + + unless (is(scalar @warnings, 0, + "Verify utf8n_to_uvchr() for $hex_n generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + + undef @warnings; + + $ret_ref = test_valid_utf8_to_uvchr($bytes); + is($ret_ref->[0], $n, "Verify valid_utf8_to_uvchr($display_bytes) returns $hex_n"); + is($ret_ref->[1], $len, "Verify valid_utf8_to_uvchr() for $hex_n returns expected length"); + + unless (is(scalar @warnings, 0, + "Verify valid_utf8_to_uvchr() for $hex_n generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + + # Similarly for uvchr_to_utf8 + my $this_uvchr_flags = $look_for_everything_uvchr_to; + if ($n > 2 ** 31 - 1) { + $this_uvchr_flags &= + ~($UNICODE_DISALLOW_ABOVE_31_BIT|$UNICODE_WARN_ABOVE_31_BIT); + } + if ($n > 0x10FFFF) { + $this_uvchr_flags &= ~($UNICODE_DISALLOW_SUPER|$UNICODE_WARN_SUPER); + } + elsif (($n & 0xFFFE) == 0xFFFE) { + $this_uvchr_flags &= ~($UNICODE_DISALLOW_NONCHAR|$UNICODE_WARN_NONCHAR); + } + $display_flags = sprintf "0x%x", $this_uvchr_flags; + + undef @warnings; + + my $ret = test_uvchr_to_utf8_flags($n, $this_uvchr_flags); + ok(defined $ret, "Verify uvchr_to_utf8_flags($hex_n, $display_flags) returned success"); + is($ret, $bytes, "Verify uvchr_to_utf8_flags($hex_n, $display_flags) returns correct bytes"); + + unless (is(scalar @warnings, 0, + "Verify uvchr_to_utf8_flags($hex_n, $display_flags) for $hex_n generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } +} + +my $REPLACEMENT = 0xFFFD; + +# Now test the malformations. All these raise category utf8 warnings. +my $c = (isASCII) ? "\x80" : "\xa0"; # A continuation byte +my @malformations = ( [ "zero length string malformation", "", 0, $UTF8_ALLOW_EMPTY, 0, 0, qr/empty string/ ], - [ "orphan continuation byte malformation", "\x80a", 2, + [ "orphan continuation byte malformation", I8_to_native("${c}a"), + 2, $UTF8_ALLOW_CONTINUATION, $REPLACEMENT, 1, qr/unexpected continuation byte/ ], - [ "premature next character malformation (immediate)", "\xc2a", 2, + [ "premature next character malformation (immediate)", + (isASCII) ? "\xc2a" : I8_to_native("\xc5") ."a", + 2, $UTF8_ALLOW_NON_CONTINUATION, $REPLACEMENT, 1, qr/unexpected non-continuation byte.*immediately after start byte/ ], - [ "premature next character malformation (non-immediate)", "\xf0\x80a", 3, + [ "premature next character malformation (non-immediate)", + I8_to_native("\xf0${c}a"), + 3, $UTF8_ALLOW_NON_CONTINUATION, $REPLACEMENT, 2, qr/unexpected non-continuation byte .* 2 bytes after start byte/ ], - [ "too short malformation", "\xf0\x80a", 2, + [ "too short malformation", I8_to_native("\xf0${c}a"), 2, # Having the 'a' after this, but saying there are only 2 bytes also # tests that we pay attention to the passed in length $UTF8_ALLOW_SHORT, $REPLACEMENT, 2, qr/2 bytes, need 4/ ], - [ "overlong malformation", "\xc1\xaf", 2, - $UTF8_ALLOW_LONG, ord('o'), 2, + [ "overlong malformation", I8_to_native("\xc0$c"), 2, + $UTF8_ALLOW_LONG, + 0, # NUL + 2, qr/2 bytes, need 1/ ], - [ "overflow malformation", "\xff\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf", 13, + [ "overflow malformation", + # These are the smallest overflowing on 64 byte machines: + # 2**64 + (isASCII) ? "\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0" + : I8_to_native("\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + (isASCII) ? 13 : 14, 0, # There is no way to allow this malformation - $REPLACEMENT, 13, + $REPLACEMENT, + (isASCII) ? 13 : 14, qr/overflow/ ], -) { +); + +foreach my $test (@malformations) { my ($testname, $bytes, $length, $allow_flags, $allowed_uv, $expected_len, $message ) = @$test; next if ! ok(length($bytes) >= $length, "$testname: Make sure won't read beyond buffer: " . length($bytes) . " >= $length"); @@ -102,7 +507,7 @@ foreach my $test ( } else { if (scalar @warnings) { - note "The warnings were: " . join(", ", @warnings); + diag "The warnings were: " . join(", ", @warnings); } } @@ -113,7 +518,7 @@ foreach my $test ( is($ret_ref->[0], 0, "$testname: disallowed: no warnings 'utf8': Returns 0"); is($ret_ref->[1], $expected_len, "$testname: disallowed: no warnings 'utf8': Returns expected length"); if (!is(scalar @warnings, 0, "$testname: disallowed: no warnings 'utf8': no warnings generated")) { - note "The warnings were: " . join(", ", @warnings); + diag "The warnings were: " . join(", ", @warnings); } } @@ -123,7 +528,7 @@ foreach my $test ( is($ret_ref->[0], 0, "$testname: CHECK_ONLY: Returns 0"); is($ret_ref->[1], -1, "$testname: CHECK_ONLY: returns expected length"); if (! is(scalar @warnings, 0, "$testname: CHECK_ONLY: no warnings generated")) { - note "The warnings were: " . join(", ", @warnings); + diag "The warnings were: " . join(", ", @warnings); } next if $allow_flags == 0; # Skip if can't allow this malformation @@ -135,67 +540,348 @@ foreach my $test ( is($ret_ref->[1], $expected_len, "$testname: allowed: Returns expected length"); if (!is(scalar @warnings, 0, "$testname: allowed: no warnings generated")) { - note "The warnings were: " . join(", ", @warnings); + diag "The warnings were: " . join(", ", @warnings); } } -my $FF_ret; - -use Unicode::UCD; -my $has_quad = ($Unicode::UCD::MAX_CP > 0xFFFF_FFFF); -if ($has_quad) { - no warnings qw{portable overflow}; - $FF_ret = 0x1000000000; -} -else { # The above overflows unless a quad platform - $FF_ret = 0; -} - # Now test the cases where a legal code point is generated, but may or may not # be allowed/warned on. my @tests = ( - [ "surrogate", "\xed\xa4\x8d", - $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, 'surrogate', 0xD90D, 3, + [ "lowest surrogate", + (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), + $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, + 'surrogate', 0xD800, + (isASCII) ? 3 : 4, qr/surrogate/ ], - [ "non_unicode", "\xf4\x90\x80\x80", - $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, 'non_unicode', 0x110000, 4, - qr/not Unicode/ + [ "a middle surrogate", + (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), + $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, + 'surrogate', 0xD90D, + (isASCII) ? 3 : 4, + qr/surrogate/ + ], + [ "highest surrogate", + (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), + $UTF8_WARN_SURROGATE, $UTF8_DISALLOW_SURROGATE, + 'surrogate', 0xDFFF, + (isASCII) ? 3 : 4, + qr/surrogate/ + ], + [ "first non_unicode", + (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), + $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, + 'non_unicode', 0x110000, + (isASCII) ? 4 : 5, + qr/not Unicode.* may not be portable/ ], - [ "non-character code point", "\xEF\xB7\x90", - $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, 'nonchar', 0xFDD0, 3, + [ "first of 32 consecutive non-character code points", + (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFDD0, + (isASCII) ? 3 : 4, qr/Unicode non-character.*is not recommended for open interchange/ ], - [ "begins with FE", "\xfe\x82\x80\x80\x80\x80\x80", - + [ "a mid non-character code point of the 32 consecutive ones", + (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFDE0, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "final of 32 consecutive non-character code points", + (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFDEF, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFE", + (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFFFE, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFF", + (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFFFF, + (isASCII) ? 3 : 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+1FFFE", + (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x1FFFE, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+1FFFF", + (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x1FFFF, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+2FFFE", + (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x2FFFE, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+2FFFF", + (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x2FFFF, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+3FFFE", + (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x3FFFE, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+3FFFF", + (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x3FFFF, 4, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+4FFFE", + (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x4FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+4FFFF", + (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x4FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+5FFFE", + (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x5FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+5FFFF", + (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x5FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+6FFFE", + (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x6FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+6FFFF", + (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x6FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+7FFFE", + (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x7FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+7FFFF", + (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x7FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+8FFFE", + (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x8FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+8FFFF", + (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x8FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+9FFFE", + (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x9FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+9FFFF", + (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x9FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+AFFFE", + (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xAFFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+AFFFF", + (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xAFFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+BFFFE", + (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xBFFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+BFFFF", + (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xBFFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+CFFFE", + (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xCFFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+CFFFF", + (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xCFFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+DFFFE", + (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xDFFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+DFFFF", + (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xDFFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+EFFFE", + (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xEFFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+EFFFF", + (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xEFFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFFE", + (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFFFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+FFFFF", + (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0xFFFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+10FFFE", + (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x10FFFE, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "non-character code point U+10FFFF", + (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), + $UTF8_WARN_NONCHAR, $UTF8_DISALLOW_NONCHAR, + 'nonchar', 0x10FFFF, + (isASCII) ? 4 : 5, + qr/Unicode non-character.*is not recommended for open interchange/ + ], + [ "requires at least 32 bits", + (isASCII) + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), # This code point is chosen so that it is representable in a UV on # 32-bit machines - $UTF8_WARN_FE_FF, $UTF8_DISALLOW_FE_FF, 'utf8', 0x80000000, 7, + $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, + 'utf8', 0x80000000, (isASCII) ? 7 :14, + qr/Code point 0x80000000 is not Unicode, and not portable/ + ], + [ "requires at least 32 bits, and use SUPER-type flags, instead of ABOVE_31_BIT", + (isASCII) + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, + 'utf8', 0x80000000, (isASCII) ? 7 :14, qr/Code point 0x80000000 is not Unicode, and not portable/ ], - [ "overflow with FE/FF", - # This tests the interaction of WARN_FE_FF/DISALLOW_FE_FF with - # overflow. The overflow malformation is never allowed, so preventing - # it takes precedence if the FE_FF options would otherwise allow in an - # overflowing value. These two code points (1 for 32-bits; 1 for 64) - # were chosen because the old overflow detection algorithm did not - # catch them; this means this test also checks for that fix. - ($has_quad) - ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" - : "\xfe\x86\x80\x80\x80\x80\x80", - - # We include both warning categories to make sure the FE_FF one has - # precedence - "$UTF8_WARN_FE_FF|$UTF8_WARN_SUPER", "$UTF8_DISALLOW_FE_FF", 'utf8', 0, - ($has_quad) ? 13 : 7, + [ "overflow with warnings/disallow for more than 31 bits", + # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT + # with overflow. The overflow malformation is never allowed, so + # preventing it takes precedence if the ABOVE_31_BIT options would + # otherwise allow in an overflowing value. The ASCII code points (1 + # for 32-bits; 1 for 64) were chosen because the old overflow + # detection algorithm did not catch them; this means this test also + # checks for that fix. The EBCDIC are arbitrary overflowing ones + # since we have no reports of failures with it. + (($is64bit) + ? ((isASCII) + ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) + : ((isASCII) + ? "\xfe\x86\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), + + # We include both warning categories to make sure the ABOVE_31_BIT one + # has precedence + "$UTF8_WARN_ABOVE_31_BIT|$UTF8_WARN_SUPER", + "$UTF8_DISALLOW_ABOVE_31_BIT", + 'utf8', 0, + (! isASCII) ? 14 : ($is64bit) ? 13 : 7, qr/overflow at byte .*, after start byte 0xf/ ], ); -if ($has_quad) { # All FF's will overflow on 32 bit +if ($is64bit) { + no warnings qw{portable overflow}; push @tests, - [ "begins with FF", "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80", - $UTF8_WARN_FE_FF, $UTF8_DISALLOW_FE_FF, 'utf8', $FF_ret, 13, + [ "More than 32 bits", + (isASCII) + ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, + 'utf8', 0x1000000000, (isASCII) ? 13 : 14, qr/Code point 0x.* is not Unicode, and not portable/ ]; } @@ -218,18 +904,18 @@ foreach my $test (@tests) { my $eval_warn = $do_warning ? "use warnings '$warning'" : $warning eq "utf8" - ? "no warnings 'utf8'" - : "use warnings 'utf8'; no warnings '$warning'"; + ? "no warnings 'utf8'" + : "use warnings 'utf8'; no warnings '$warning'"; # is effectively disallowed if will overflow, even if the # flag indicates it is allowed, fix up test name to # indicate this as well my $disallowed = $disallow_flag || $will_overflow; - my $this_name = "$testname: " . (($disallow_flag) + my $this_name = "utf8n_to_uvchr() $testname: " . (($disallow_flag) ? 'disallowed' : ($disallowed) - ? 'FE_FF allowed' + ? 'ABOVE_31_BIT allowed' : 'allowed'); $this_name .= ", $eval_warn"; $this_name .= ", " . (($warn_flag) @@ -238,22 +924,32 @@ foreach my $test (@tests) { undef @warnings; my $ret_ref; - #note __LINE__ . ": $eval_warn; \$ret_ref = test_utf8n_to_uvchr('$bytes', $length, $warn_flag|$disallow_flag)"; - my $eval_text = "$eval_warn; \$ret_ref = test_utf8n_to_uvchr('$bytes', $length, $warn_flag|$disallow_flag)"; + my $display_bytes = display_bytes($bytes); + my $call = "Call was: $eval_warn; \$ret_ref = test_utf8n_to_uvchr('$display_bytes', $length, $warn_flag|$disallow_flag)"; + my $eval_text = "$eval_warn; \$ret_ref = test_utf8n_to_uvchr('$bytes', $length, $warn_flag|$disallow_flag)"; eval "$eval_text"; if (! ok ("$@ eq ''", "$this_name: eval succeeded")) { - note "\$!='$!'; eval'd=\"$eval_text\""; + diag "\$!='$!'; eval'd=\"$call\""; next; } if ($disallowed) { - is($ret_ref->[0], 0, "$this_name: Returns 0"); + unless (is($ret_ref->[0], 0, "$this_name: Returns 0")) + { + diag $call; + } } else { - is($ret_ref->[0], $allowed_uv, - "$this_name: Returns expected uv"); + unless (is($ret_ref->[0], $allowed_uv, + "$this_name: Returns expected uv")) + { + diag $call; + } + } + unless (is($ret_ref->[1], $expected_len, + "$this_name: Returns expected length")) + { + diag $call; } - is($ret_ref->[1], $expected_len, - "$this_name: Returns expected length"); if (! $do_warning && ($warning eq 'utf8' || $warning eq $category)) @@ -261,7 +957,8 @@ foreach my $test (@tests) { if (!is(scalar @warnings, 0, "$this_name: No warnings generated")) { - note "The warnings were: " . join(", ", @warnings); + diag $call; + diag "The warnings were: " . join(", ", @warnings); } } elsif ($will_overflow @@ -276,12 +973,16 @@ foreach my $test (@tests) { if (is(scalar @warnings, 1, "$this_name: Got a single warning ")) { - like($warnings[0], qr/overflow/, - "$this_name: Got overflow warning"); + unless (like($warnings[0], qr/overflow/, + "$this_name: Got overflow warning")) + { + diag $call; + } } else { + diag $call; if (scalar @warnings) { - note "The warnings were: " + diag "The warnings were: " . join(", ", @warnings); } } @@ -292,12 +993,16 @@ foreach my $test (@tests) { if (is(scalar @warnings, 1, "$this_name: Got a single warning ")) { - like($warnings[0], $message, - "$this_name: Got expected warning"); + unless (like($warnings[0], $message, + "$this_name: Got expected warning")) + { + diag $call; + } } else { + diag $call; if (scalar @warnings) { - note "The warnings were: " + diag "The warnings were: " . join(", ", @warnings); } } @@ -310,137 +1015,139 @@ foreach my $test (@tests) { undef @warnings; $ret_ref = test_utf8n_to_uvchr($bytes, $length, $disallow_flag|$UTF8_CHECK_ONLY); - is($ret_ref->[0], 0, "$this_name, CHECK_ONLY: Returns 0"); - is($ret_ref->[1], -1, - "$this_name: CHECK_ONLY: returns expected length"); + unless (is($ret_ref->[0], 0, "$this_name, CHECK_ONLY: Returns 0")) { + diag $call; + } + unless (is($ret_ref->[1], -1, + "$this_name: CHECK_ONLY: returns expected length")) + { + diag $call; + } if (! is(scalar @warnings, 0, "$this_name, CHECK_ONLY: no warnings generated")) { - note "The warnings were: " . join(", ", @warnings); + diag $call; + diag "The warnings were: " . join(", ", @warnings); } } - } - } - } - } -} -} + # Now repeat some of the above, but for + # uvchr_to_utf8_flags(). Since this comes from an + # existing code point, it hasn't overflowed. + next if $will_overflow; -# The numbers in this array are chosen because they are "interesting" on -# either ASCII or EBCDIC platforms. 0-255 require special handling on EBCDIC; -# others are the boundaries where the number of bytes required to represent -# them increase. -my @code_points = (0 .. 256, - 0x400 - 1, 0x400, - 0x800 - 1, 0x800, - 0x4000 - 1, 0x4000, - 0x8000 - 1, 0x8000, - 0xD000 - 1, 0xD000, # First code point considered - # problematic on ASCII. - 0x10000 - 1, 0x1000, - 0x200000 - 1, 0x20000, - 0x40000 - 1, 0x40000, - 0x400000 - 1, 0x400000, - 0x4000000 - 1, 0x4000000, - 0x80000000 - 1 # Highest legal on EBCDIC machines - ); -for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } - @code_points) -{ - my $hex_u = sprintf("0x%02X", $u); - my $n = utf8::unicode_to_native($u); - my $hex_n = sprintf("0x%02X", $n); - - my $offskip_should_be = (ord ("A") == 65) - ? ( $u < 0x80 ? 1 : - $u < 0x800 ? 2 : - $u < 0x10000 ? 3 : - $u < 0x200000 ? 4 : - $u < 0x4000000 ? 5 : - $u < 0x80000000 ? 6 : 7 # 13 for 64 bit words - ) - : ($u < 0xA0 ? 1 : - $u < 0x400 ? 2 : - $u < 0x4000 ? 3 : - $u < 0x40000 ? 4 : - $u < 0x400000 ? 5 : - $u < 0x4000000 ? 6 : 7 - ); - - # If this test fails, subsequent ones are meaningless. - next unless is(test_OFFUNISKIP($u), $offskip_should_be, - "Verify OFFUNISKIP($hex_u) is $offskip_should_be"); - my $invariant = $offskip_should_be == 1; - my $display_invariant = $invariant || 0; - is(test_OFFUNI_IS_INVARIANT($u), $invariant, - "Verify OFFUNI_IS_INVARIANT($hex_u) is $display_invariant"); - - my $uvchr_skip_should_be = $offskip_should_be; - next unless is(test_UVCHR_SKIP($n), $uvchr_skip_should_be, - "Verify UVCHR_SKIP($hex_n) is $uvchr_skip_should_be"); - is(test_UVCHR_IS_INVARIANT($n), $offskip_should_be == 1, - "Verify UVCHR_IS_INVARIANT($hex_n) is $display_invariant"); - - my $n_chr = chr $n; - utf8::upgrade $n_chr; - - is(test_UTF8_SKIP($n_chr), $uvchr_skip_should_be, - "Verify UTF8_SKIP(chr $hex_n) is $uvchr_skip_should_be"); - - use bytes; - for (my $j = 0; $j < length $n_chr; $j++) { - my $b = substr($n_chr, $j, 1); - my $hex_b = sprintf("\"\\x%02x\"", ord $b); - - my $byte_invariant = $j == 0 && $uvchr_skip_should_be == 1; - my $display_byte_invariant = $byte_invariant || 0; - next unless is(test_UTF8_IS_INVARIANT($b), $byte_invariant, - " Verify UTF8_IS_INVARIANT($hex_b) for byte $j " - . "is $display_byte_invariant"); - - my $is_start = $j == 0 && $uvchr_skip_should_be > 1; - my $display_is_start = $is_start || 0; - next unless is(test_UTF8_IS_START($b), $is_start, - " Verify UTF8_IS_START($hex_b) is $display_is_start"); - - my $is_continuation = $j != 0 && $uvchr_skip_should_be > 1; - my $display_is_continuation = $is_continuation || 0; - next unless is(test_UTF8_IS_CONTINUATION($b), $is_continuation, - " Verify UTF8_IS_CONTINUATION($hex_b) is " - . "$display_is_continuation"); - - my $is_continued = $uvchr_skip_should_be > 1; - my $display_is_continued = $is_continued || 0; - next unless is(test_UTF8_IS_CONTINUED($b), $is_continued, - " Verify UTF8_IS_CONTINUED($hex_b) is " - . "$display_is_continued"); + # The warning and disallow flags passed in are for + # utf8n_to_uvchr(). Convert them for + # uvchr_to_utf8_flags(). + my $uvchr_warn_flag = 0; + my $uvchr_disallow_flag = 0; + if ($warn_flag) { + if ($warn_flag == $UTF8_WARN_SURROGATE) { + $uvchr_warn_flag = $UNICODE_WARN_SURROGATE + } + elsif ($warn_flag == $UTF8_WARN_NONCHAR) { + $uvchr_warn_flag = $UNICODE_WARN_NONCHAR + } + elsif ($warn_flag == $UTF8_WARN_SUPER) { + $uvchr_warn_flag = $UNICODE_WARN_SUPER + } + elsif ($warn_flag == $UTF8_WARN_ABOVE_31_BIT) { + $uvchr_warn_flag = $UNICODE_WARN_ABOVE_31_BIT; + } + else { + fail(sprintf "Unexpected warn flag: %x", + $warn_flag); + next; + } + } + if ($disallow_flag) { + if ($disallow_flag == $UTF8_DISALLOW_SURROGATE) { + $uvchr_disallow_flag = $UNICODE_DISALLOW_SURROGATE + } + elsif ($disallow_flag == $UTF8_DISALLOW_NONCHAR) { + $uvchr_disallow_flag = $UNICODE_DISALLOW_NONCHAR + } + elsif ($disallow_flag == $UTF8_DISALLOW_SUPER) { + $uvchr_disallow_flag = $UNICODE_DISALLOW_SUPER + } + elsif ($disallow_flag == $UTF8_DISALLOW_ABOVE_31_BIT) { + $uvchr_disallow_flag = + $UNICODE_DISALLOW_ABOVE_31_BIT; + } + else { + fail(sprintf "Unexpected disallow flag: %x", + $disallow_flag); + next; + } + } - my $is_downgradeable_start = $n < 256 - && $uvchr_skip_should_be > 1 - && $j == 0; - my $display_is_downgradeable_start = $is_downgradeable_start || 0; - next unless is(test_UTF8_IS_DOWNGRADEABLE_START($b), - $is_downgradeable_start, - " Verify UTF8_IS_DOWNGRADEABLE_START($hex_b) is " - . "$display_is_downgradeable_start"); + $disallowed = $uvchr_disallow_flag; - my $is_above_latin1 = $n > 255 && $j == 0; - my $display_is_above_latin1 = $is_above_latin1 || 0; - next unless is(test_UTF8_IS_ABOVE_LATIN1($b), - $is_above_latin1, - " Verify UTF8_IS_ABOVE_LATIN1($hex_b) is " - . "$display_is_above_latin1"); + $this_name = "uvchr_to_utf8_flags() $testname: " + . (($uvchr_disallow_flag) + ? 'disallowed' + : ($disallowed) + ? 'ABOVE_31_BIT allowed' + : 'allowed'); + $this_name .= ", $eval_warn"; + $this_name .= ", " . (($uvchr_warn_flag) + ? 'with warning flag' + : 'no warning flag'); - my $is_possibly_problematic = $j == 0 - && $n >= (($isASCII) - ? 0xD000 - : 0x8000); - my $display_is_possibly_problematic = $is_possibly_problematic || 0; - next unless is(test_isUTF8_POSSIBLY_PROBLEMATIC($b), - $is_possibly_problematic, - " Verify isUTF8_POSSIBLY_PROBLEMATIC($hex_b) is " - . "$display_is_above_latin1"); + undef @warnings; + my $ret; + my $warn_flag = sprintf "0x%x", $uvchr_warn_flag; + my $disallow_flag = sprintf "0x%x", $uvchr_disallow_flag; + $call = sprintf "call was: $eval_warn; \$ret = test_uvchr_to_utf8_flags(0x%x, $warn_flag|$disallow_flag)", $allowed_uv; + $eval_text = "$eval_warn; \$ret = test_uvchr_to_utf8_flags($allowed_uv, $warn_flag|$disallow_flag)"; + eval "$eval_text"; + if (! ok ("$@ eq ''", "$this_name: eval succeeded")) { + diag "\$!='$!'; eval'd=\"$eval_text\""; + next; + } + if ($disallowed) { + unless (is($ret, undef, "$this_name: Returns undef")) { + diag $call; + } + } + else { + unless (is($ret, $bytes, "$this_name: Returns expected string")) { + diag $call; + } + } + if (! $do_warning + && ($warning eq 'utf8' || $warning eq $category)) + { + if (!is(scalar @warnings, 0, + "$this_name: No warnings generated")) + { + diag $call; + diag "The warnings were: " . join(", ", @warnings); + } + } + elsif ($uvchr_warn_flag + && ($warning eq 'utf8' || $warning eq $category)) + { + if (is(scalar @warnings, 1, + "$this_name: Got a single warning ")) + { + unless (like($warnings[0], $message, + "$this_name: Got expected warning")) + { + diag $call; + } + } + else { + diag $call; + if (scalar @warnings) { + diag "The warnings were: " + . join(", ", @warnings); + } + } + } + } + } + } } } diff --git a/ext/re/t/re_funcs_u.t b/ext/re/t/re_funcs_u.t index 781ffc9..70820df3 100644 --- a/ext/re/t/re_funcs_u.t +++ b/ext/re/t/re_funcs_u.t @@ -6,7 +6,8 @@ BEGIN { print "1..0 # Skip -- Perl configured without re module\n"; exit 0; } - require 'test.pl'; # For watchdog + require 'test.pl'; # For watchdog + require 'loc_tools.pl'; # To see if platform has locales } use strict; @@ -92,8 +93,7 @@ if ('1234'=~/(?:(?\d)|(?!))(?\d)(?\d)(?\d)/){ } SKIP: { - skip_if_miniperl("no dynamic loading on miniperl, no POSIX", 3); - skip 'No locale testing without d_setlocale', 3 if(!$Config::Config{d_setlocale}); + skip 'No locales available', 3 unless locales_enabled('LC_CTYPE'); require POSIX; my $current_locale = POSIX::setlocale( &POSIX::LC_CTYPE, 'de_DE.ISO-8859-1' ); if ( !$current_locale || $current_locale ne 'de_DE.ISO-8859-1' ) { @@ -109,8 +109,7 @@ if ('1234'=~/(?:(?\d)|(?!))(?\d)(?\d)(?\d)/){ } SKIP: { - skip_if_miniperl("no dynamic loading on miniperl, no POSIX", 3); - skip 'No locale testing without d_setlocale', 3 if(!$Config::Config{d_setlocale}); + skip 'No locales available', 3 unless locales_enabled('LC_CTYPE'); require POSIX; my $current_locale = POSIX::setlocale( &POSIX::LC_CTYPE, 'C' ); if ( !$current_locale || $current_locale ne 'C' ) { diff --git a/gv.h b/gv.h index a6b695e..e3357bc 100644 --- a/gv.h +++ b/gv.h @@ -151,6 +151,10 @@ Return the CV from the GV. #define GvENAME_HEK(gv) GvNAME_HEK(GvEGV(gv) ? GvEGV(gv) : gv) #define GvESTASH(gv) GvSTASH(GvEGV(gv) ? GvEGV(gv) : gv) +/* GVf_INTRO is one-shot flag which indicates that the next assignment + of a reference to the glob is to be localised; it distinguishes + 'local *g = $ref' from '*g = $ref'. +*/ #define GVf_INTRO 0x01 #define GVf_MULTI 0x02 #define GVf_ASSUMECV 0x04 diff --git a/hints/catamount.sh b/hints/catamount.sh index 21dc947..de7f1e9 100644 --- a/hints/catamount.sh +++ b/hints/catamount.sh @@ -31,11 +31,11 @@ # mkdir -p /opt/perl-catamount # mkdir -p /opt/perl-catamount/include # mkdir -p /opt/perl-catamount/lib -# mkdir -p /opt/perl-catamount/lib/perl5/5.23.5 +# mkdir -p /opt/perl-catamount/lib/perl5/5.23.6 # mkdir -p /opt/perl-catamount/bin # cp *.h /opt/perl-catamount/include # cp libperl.a /opt/perl-catamount/lib -# cp -pr lib/* /opt/perl-catamount/lib/perl5/5.23.5 +# cp -pr lib/* /opt/perl-catamount/lib/perl5/5.23.6 # cp miniperl perl run.sh cc.sh /opt/perl-catamount/lib # # With the headers and the libperl.a you can embed Perl to your Catamount diff --git a/hints/darwin.sh b/hints/darwin.sh index fa55b44..2af6ae7 100644 --- a/hints/darwin.sh +++ b/hints/darwin.sh @@ -187,26 +187,57 @@ case "$ld" in esac # From http://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/mk/platform/Darwin.mk +# and https://trac.macports.org/wiki/XcodeVersionInfo +# and https://trac.macports.org/wiki/UsingTheRightCompiler # # OS, Kernel, Xcode Version # Note that Xcode gets updates on older systems sometimes. # pkgsrc generally expects that the most up-to-date xcode available for # an OS version is installed # +# Note that Apple hijacks the clang preprocessor symbols __clang_major__ +# and __clang_minor__ so they cannot be used (easily) to detect the +# actual clang release. For example: +# +# "Yosemite 10.10.x 14.x.y 6.3 (clang 3.6 as 6.1/602.0.49)" +# +# means that the Xcode 6.3 provided the clang 6.3 but called it 6.1 +# (__clang_major__, __clang_minor__) and in addition the preprocessor +# symbol __apple_build_version__ was 6020049. +# # Codename OS Kernel Xcode +# # Cheetah 10.0.x 1.3.1 # Puma 10.1 1.4.1 # 10.1.x 5.x.y # Jaguar 10.2.x 6.x.y # Panther 10.3.x 7.x.y -# Tiger 10.4.x 8.x.y 2.x (gcc 4.0, 4.0.1 from 2.2) -# Leopard 10.5.x 9.x.y 3.x (gcc 4.0.1, 4.0.1 and 4.2.1 from 3.1) -# Snow Leopard 10.6.x 10.x.y 3.2+ (gcc 4.0.1 and 4.2.1) -# Lion 10.7.x 11.x.y 4.1 (llvm gcc 4.2.1) -# Mountain Lion 10.8.x 12.x.y 4.5 (llvm gcc 4.2.1) -# Mavericks 10.9.x 13.x.y 6 (llvm clang 6.0) -# Yosemite 10.10.x 14.x.y 6 (llvm clang 6.0) -# El Capitan 10.11.x 15.x.y 7 (llvm clang 7.0) +# Tiger 10.4.x 8.x.y 2.0 (gcc4 4.0.0) +# 2.2 (gcc4 4.0.1) +# 2.2.1 (gcc 3.3) +# 2.5 ? +# Leopard 10.5.x 9.x.y 3.0 (gcc 4.0.1 default) +# 3.1 (gcc 4.2.1) +# Snow Leopard 10.6.x 10.x.y 3.2 (llvm gcc 4.2, clang 2.3 as 1.0) +# 3.2.1 (clang 1.0.1 as 1.0.1/24) +# 3.2.2 (clang 1.0.2 as 1.0.2/32) +# 3.2.3 (clang 1.5 as 1.5/60) +# 4.0.1 (clang 2.9 as 2.0/138) +# Lion 10.7.x 11.x.y 4.1 (llvm gcc 4.2.1, clang 3.0 as 2.1/163.7.1) +# 4.2 (clang 3.0 as 3.0/211.10.1) +# 4.3.3 (clang 3.1 as 3.1/318.0.61) +# 4.4 (clang 3.1 as 4.0/421.0.57) +# Mountain Lion 10.8.x 12.x.y 4.5 (clang 3.1 as 4.1/421.11.65, real gcc removed, there is gcc but it's really clang) +# 4.6 (clang 3.2 as 4.2/425.0.24) +# 5.0 (clang 3.3 as 5.0/500.2.75) +# 5.1 (clang 3.4 as 5.1/503.0.38) +# Mavericks 10.9.x 13.x.y 6.0.1 (clang 3.5 as 6.0/600.0.51) +# 6.1 (clang 3.5 as 6.0/600.0.54) +# 6.2 (clang 3.5 as 6.0/600.0.57) +# Yosemite 10.10.x 14.x.y 6.3 (clang 3.6 as 6.1/602.0.49) +# El Capitan 10.11.x 15.x.y 7.0 (clang 3.7 as 7.0/700.0.72) +# 7.1 (clang 3.7 as 7.0/700.1.76) +# # Processors Supported # @@ -459,3 +490,13 @@ esac # makefile in the same place. Since Darwin uses GNU make, this dodges # the problem. firstmakefile=GNUmakefile; + +# Parts of the system call setenv(), in particular in an atfork handler. +# This causes problems when the child tries to clean up environ[], so +# let libc manage environ[]. +cat >> config.over <<'EOOVER' +if test "$d_unsetenv" = "$define" -a \ + `expr "$ccflags" : '.*-DPERL_USE_SAFE_PUTENV'` -eq 0; then + ccflags="$ccflags -DPERL_USE_SAFE_PUTENV" +fi +EOOVER diff --git a/hints/solaris_2.sh b/hints/solaris_2.sh index 2f700d5..ba368e6 100644 --- a/hints/solaris_2.sh +++ b/hints/solaris_2.sh @@ -723,3 +723,9 @@ case "$cc" in ;; esac +# Oracle/Sun builds their Perl shared since 5.6.1, and they also +# strongly recommend using shared libraries in general. +# +# Furthermore, OpenIndiana seems to effectively require building perl +# shared, or otherwise perl scripts won't even find the Perl library. +useshrplib='true' diff --git a/installhtml b/installhtml index 3b1eda8..06342a8 100644 --- a/installhtml +++ b/installhtml @@ -500,12 +500,12 @@ sub installdir { no_upwards($_) or next; my $is_dir = -d "$podroot/$dir/$_"; next if $is_dir and not $recurse; - my $target = ( - $is_dir ? \@dirlist : - s/\.pod$// ? \@podlist : - s/\.pm$// ? \@pmlist : - undef - ); + my $target + = $is_dir ? \@dirlist + : s/\.pod$// ? \@podlist + : s/\.pm$// ? \@pmlist + : undef + ; push @$target, "$dir/$_" if $target; } diff --git a/intrpvar.h b/intrpvar.h index 7f9fa92..442f9fb 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -191,7 +191,7 @@ PERLVAR(I, statgv, GV *) PERLVARI(I, statname, SV *, NULL) #ifdef HAS_TIMES -/* Will be removed soon after v5.23.5. See RT #121351 */ +/* Will be removed soon after v5.23.6. See RT #121351 */ PERLVAR(I, timesbuf, struct tms) #endif @@ -766,7 +766,7 @@ PERLVARI(I, globhook, globhook_t, NULL) PERLVARI(I, padlist_generation, U32, 1) /* id to identify padlist clones */ -/* The last unconditional member of the interpreter structure when 5.23.5 was +/* The last unconditional member of the interpreter structure when 5.23.6 was released. The offset of the end of this is baked into a global variable in any shared perl library which will allow a sanity test in future perl releases. */ diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm index d4c6f60..7d9ad2b 100644 --- a/lib/B/Deparse.pm +++ b/lib/B/Deparse.pm @@ -46,7 +46,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring MDEREF_SHIFT ); -$VERSION = '1.35'; +$VERSION = '1.36'; use strict; use vars qw/$AUTOLOAD/; use warnings (); @@ -2642,10 +2642,11 @@ sub pp_readline { my $self = shift; my($op, $cx) = @_; my $kid = $op->first; - if (is_scalar($kid)) { - my $kid_deparsed = $self->deparse($kid, 1); - return '<<>>' if $op->flags & OPf_SPECIAL and $kid_deparsed eq 'ARGV'; - return "<$kid_deparsed>"; + if (is_scalar($kid) + and $op->flags & OPf_SPECIAL + and $self->deparse($kid, 1) eq 'ARGV') + { + return '<<>>'; } return $self->unop($op, $cx, "readline"); } @@ -3221,19 +3222,10 @@ sub pp_glob { my $kid = $op->first->sibling; # skip pushmark my $keyword = $op->flags & OPf_SPECIAL ? 'glob' : $self->keyword('glob'); - my $text; - if ($keyword =~ /^CORE::/ - or $kid->name ne 'const' - or ($text = $self->dq($kid)) - =~ /^\$?(\w|::|\`)+$/ # could look like a readline - or $text =~ /[<>]/) { - $text = $self->deparse($kid); - return $cx >= 5 || $self->{'parens'} - ? "$keyword($text)" - : "$keyword $text"; - } else { - return '<' . $text . '>'; - } + my $text = $self->deparse($kid); + return $cx >= 5 || $self->{'parens'} + ? "$keyword($text)" + : "$keyword $text"; } # Truncate is special because OPf_SPECIAL makes a bareword first arg diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t index 62c0a4b..704b31e 100644 --- a/lib/B/Deparse.t +++ b/lib/B/Deparse.t @@ -128,7 +128,7 @@ $b = quotemeta <<'EOF'; BEGIN { $^I = ".bak"; } BEGIN { $^W = 1; } BEGIN { $/ = "\n"; $\ = "\n"; } -LINE: while (defined($_ = )) { +LINE: while (defined($_ = readline ARGV)) { chomp $_; our(@F) = split(' ', $_, 0); '???'; @@ -628,12 +628,27 @@ local our($rhu, $barb); #### # <> my $foo; -$_ .= . <$foo>; +$_ .= <> . . <$foo>; +<$foo>; +<${foo}>; +<$ foo>; +>>>> +my $foo; +$_ .= readline(ARGV) . readline(ARGV) . readline($foo); +readline $foo; +glob $foo; +glob $foo; #### # readline readline 'FH'; readline *$_; +readline *{$_}; +readline ${"a"}; +>>>> +readline 'FH'; +readline *$_; readline *{$_;}; +readline ${'a';}; #### # <<>> $_ = <<>>; @@ -1363,6 +1378,10 @@ tr/a/b/r + $a =~ tr/p/q/r; #### # [perl #90898] ; +glob 'a,'; +>>>> +glob 'a,'; +glob 'a,'; #### # [perl #91008] # SKIP ?$] >= 5.023 && "autoderef deleted in this Perl version" diff --git a/lib/B/Op_private.pm b/lib/B/Op_private.pm index 2a90973..310b9da 100644 --- a/lib/B/Op_private.pm +++ b/lib/B/Op_private.pm @@ -118,7 +118,7 @@ package B::Op_private; our %bits; -our $VERSION = "5.023005"; +our $VERSION = "5.023006"; $bits{$_}{3} = 'OPpENTERSUB_AMPER' for qw(entersub rv2cv); $bits{$_}{6} = 'OPpENTERSUB_DB' for qw(entersub rv2cv); diff --git a/lib/Benchmark.pm b/lib/Benchmark.pm index b301678..90e54a6 100644 --- a/lib/Benchmark.pm +++ b/lib/Benchmark.pm @@ -482,7 +482,7 @@ our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION); clearcache clearallcache disablecache enablecache); %EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ; -$VERSION = 1.20; +$VERSION = 1.21; # --- ':hireswallclock' special handling @@ -585,6 +585,17 @@ sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; } sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; } sub iters { $_[0]->[5] ; } +# return the sum of various times: which ones depending on $style + +sub elapsed { + my ($self, $style) = @_; + $style = "" unless defined $style; + + return $self->cpu_c if $style eq 'nop'; + return $self->cpu_p if $style eq 'noc'; + return $self->cpu_a; +} + $_Usage{timediff} = <<'USAGE'; usage: $result_diff = timediff($result1, $result2); @@ -647,11 +658,7 @@ sub timestr { $r,$pu,$ps,$pt) if $style eq 'noc'; $s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)", $r,$cu,$cs,$ct) if $style eq 'nop'; - my $elapsed = do { - if ($style eq 'nop') {$cu+$cs} - elsif ($style eq 'noc') {$pu+$ps} - else {$cu+$cs+$pu+$ps} - }; + my $elapsed = $tr->elapsed($style); $s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed; $s; } @@ -897,6 +904,16 @@ sub timethis{ $n = $forn if defined $forn; + if ($t->elapsed($style) < 0) { + # due to clock granularity and variable CPU speed and load, + # on quick code with a small number of loops, it's possible for + # the empty loop to appear to take longer than the real loop + # (e.g. 1 tick verses 0 ticks). This leads to a negative elapsed + # time. In this case, floor it at zero, to stop bizarre results. + print " (warning: too few iterations for a reliable count)\n"; + $t->[$_] = 0 for 1..4; + } + # A conservative warning to spot very silly tests. # Don't assume that your benchmark is ok simply because # you don't get this warning! @@ -973,13 +990,11 @@ sub cmpthese{ my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results; for (@vals) { + # recreate the pre-flattened Benchmark object + my $tmp_bm = bless [ @{$_}[1..$#$_] ]; + my $elapsed = $tmp_bm->elapsed($style); # The epsilon fudge here is to prevent div by 0. Since clock # resolutions are much larger, it's below the noise floor. - my $elapsed = do { - if ($style eq 'nop') {$_->[4]+$_->[5]} - elsif ($style eq 'noc') {$_->[2]+$_->[3]} - else {$_->[2]+$_->[3]+$_->[4]+$_->[5]} - }; my $rate = $_->[6]/(($elapsed)+0.000000000000001); $_->[7] = $rate; } diff --git a/lib/Benchmark.t b/lib/Benchmark.t index 7706bba..4bf01b2 100644 --- a/lib/Benchmark.t +++ b/lib/Benchmark.t @@ -8,7 +8,7 @@ BEGIN { use warnings; use strict; use vars qw($foo $bar $baz $ballast); -use Test::More tests => 195; +use Test::More tests => 213; use Benchmark qw(:all); @@ -69,7 +69,7 @@ isa_ok($threesecs, 'Benchmark', "countit 0, CODEREF"); isnt ($baz, 0, "benchmarked code was run"); my $in_threesecs = $threesecs->iters; print "# in_threesecs=$in_threesecs iterations\n"; -ok ($in_threesecs > 0, "iters returned positive iterations"); +cmp_ok($in_threesecs, '>', 0, "iters returned positive iterations"); my $cpu3 = $threesecs->[1]; # user my $sys3 = $threesecs->[2]; # sys cmp_ok($cpu3+$sys3, '>=', 3.0, "3s cpu3 is at least 3s"); @@ -85,7 +85,7 @@ isa_ok($onesec, 'Benchmark', "countit 1, CODEREF"); isnt ($baz, 0, "benchmarked code was run"); my $in_onesec = $onesec->iters; print "# in_onesec=$in_onesec iterations\n"; -ok ($in_onesec > 0, "iters returned positive iterations"); +cmp_ok($in_onesec, '>', 0, "iters returned positive iterations"); my $cpu1 = $onesec->[1]; # user my $sys1 = $onesec->[2]; # sys cmp_ok($cpu1+$sys1, '>=', 1.0, "is cpu1 is at least 1s"); @@ -102,7 +102,7 @@ isa_ok($onesec, 'Benchmark', "countit 1, eval"); isnt ($baz, 0, "benchmarked code was run"); my $in_again = $again->iters; print "# $in_again iterations\n"; -ok ($in_again > 0, "iters returned positive iterations"); +cmp_ok($in_again, '>', 0, "iters returned positive iterations"); my $t1 = new Benchmark; @@ -144,7 +144,7 @@ is ($auto, $default, 'timestr ($diff, "auto") matches timestr ($diff)'); my $out = tie *OUT, 'TieOut'; -my $iterations = 3; +my $iterations = 100; $foo = 0; select(OUT); @@ -201,8 +201,8 @@ like ($got, $Nop_Pattern, 'specify format as nop'); select(STDOUT); isa_ok($got, 'Benchmark', "timethis, at least 2 seconds with format 'none'"); - ok ($foo > 0, "benchmarked code was run"); - ok ($end - $start > 1, "benchmarked code ran for over 1 second"); + cmp_ok($foo, '>', 0, "benchmarked code was run"); + cmp_ok($end - $start, '>', 1, "benchmarked code ran for over 1 second"); $got = $out->read(); # Remove any warnings about having too few iterations. @@ -288,10 +288,10 @@ my $results; isa_ok($results->{Foo}, 'Benchmark', "Foo value"); isa_ok($results->{Bar}, 'Benchmark', "Bar value"); eq_set([keys %$results], [qw(Foo Bar)], 'should be exactly two objects'); - ok ($foo > 0, "Foo code was run"); - ok ($bar > 0, "Bar code was run"); + cmp_ok($foo, '>', 0, "Foo code was run"); + cmp_ok($bar, '>', 0, "Bar code was run"); - ok (($end - $start) > 0.1, "benchmarked code ran for over 0.1 seconds"); + cmp_ok($end-$start, '>', 0.1, "benchmarked code ran for over 0.1 seconds"); $got = $out->read(); # Remove any warnings about having too few iterations. @@ -347,11 +347,10 @@ sub check_graph_consistency { pass ("slow rate is less than fast rate"); unless (ok ($slowfast <= 0 && $slowfast >= -100, "slowfast should be less than or equal to zero, and >= -100")) { - print STDERR "# slowfast $slowfast\n"; + diag("slowfast=$slowfast"); $all_passed = 0; } - unless (ok ($fastslow > 0, "fastslow should be > 0")) { - print STDERR "# fastslow $fastslow\n"; + unless (cmp_ok($fastslow, '>', 0, "fastslow should be > 0")) { $all_passed = 0; } } else { @@ -384,8 +383,7 @@ sub check_graph_vs_output { [$fastr, $fastratet, $fastslowt, $fastfast]], "check the chart layout matches the formatted output"); unless ($all_passed) { - print STDERR "# Something went wrong there. I got this chart:\n"; - print STDERR "# $_\n" foreach split /\n/, $got; + diag("Something went wrong there. I got this chart:\n$got"); } } @@ -401,12 +399,13 @@ sub check_graph { { select(OUT); my $start = times; - my $chart = cmpthese( -0.1, { a => "\$i = sqrt(\$i++) * sqrt(\$i)", + my $chart = cmpthese( -0.1, { a => "\$i = sqrt(\$i++) * sqrt(\$i) for 1..10", b => "\$i = sqrt(\$i++)", }, "auto" ) ; my $end = times; select(STDOUT); - ok (($end - $start) > 0.05, "benchmarked code ran for over 0.05 seconds"); + cmp_ok($end - $start, '>', 0.05, + "benchmarked code ran for over 0.05 seconds"); $got = $out->read(); # Remove any warnings about having too few iterations. @@ -425,11 +424,12 @@ sub check_graph { { select(OUT); my $start = times; - my $chart = cmpthese( -0.1, { a => "\$i = sqrt(\$i++) * sqrt(\$i)", + my $chart = cmpthese( -0.1, { a => "\$i = sqrt(\$i++) * sqrt(\$i) for 1..10", b => "\$i = sqrt(\$i++)" }); my $end = times; select(STDOUT); - ok (($end - $start) > 0.05, "benchmarked code ran for over 0.05 seconds"); + cmp_ok($end - $start, '>', 0.05, + "benchmarked code ran for over 0.05 seconds"); $got = $out->read(); # Remove any warnings about having too few iterations. @@ -447,15 +447,15 @@ sub check_graph { { $foo = $bar = 0; select(OUT); - my $chart = cmpthese( 10, $code_to_test, 'nop' ) ; + my $chart = cmpthese($iterations, $code_to_test, 'nop' ) ; select(STDOUT); - ok ($foo > 0, "Foo code was run"); - ok ($bar > 0, "Bar code was run"); + cmp_ok($foo, '>', 0, "Foo code was run"); + cmp_ok($bar, '>', 0, "Bar code was run"); $got = $out->read(); # Remove any warnings about having too few iterations. $got =~ s/\(warning:[^\)]+\)//gs; - like ($got, qr/timing 10 iterations of\s+Bar\W+Foo\W*?\.\.\./s, + like ($got, qr/timing $iterations iterations of\s+Bar\W+Foo\W*?\.\.\./s, 'check title'); # Remove the title $got =~ s/.*\.\.\.//s; @@ -467,10 +467,10 @@ sub check_graph { { $foo = $bar = 0; select(OUT); - my $chart = cmpthese( 10, $code_to_test, 'none' ) ; + my $chart = cmpthese($iterations, $code_to_test, 'none' ) ; select(STDOUT); - ok ($foo > 0, "Foo code was run"); - ok ($bar > 0, "Bar code was run"); + cmp_ok($foo, '>', 0, "Foo code was run"); + cmp_ok($bar, '>', 0, "Bar code was run"); $got = $out->read(); # Remove any warnings about having too few iterations. @@ -484,6 +484,32 @@ sub check_graph { check_graph (@$chart); } +# this is a repeat of the above test, but with the timing and charting +# steps split. + +{ + $foo = $bar = 0; + select(OUT); + my $res = timethese($iterations, $code_to_test, 'none' ) ; + my $chart = cmpthese($res, 'none' ) ; + select(STDOUT); + cmp_ok($foo, '>', 0, "Foo code was run"); + cmp_ok($bar, '>', 0, "Bar code was run"); + + $got = $out->read(); + # Remove any warnings about having too few iterations. + $got =~ s/\(warning:[^\)]+\)//gs; + $got =~ s/^[ \t\n]+//s; # Remove all the whitespace from the beginning + is ($got, '', "format 'none' should suppress output"); + is (ref $chart, 'ARRAY', "output should be an array ref"); + # Some of these will go bang if the preceding test fails. There will be + # a big clue as to why, from the previous test's diagnostic + is (ref $chart->[0], 'ARRAY', "output should be an array of arrays"); + use Data::Dumper; + check_graph(@$chart) + or diag(Data::Dumper->Dump([$res, $chart], ['$res', '$chart'])); +} + { $foo = $bar = 0; select(OUT); @@ -493,7 +519,7 @@ sub check_graph { is ($bar, 0, "Bar code was not run"); $got = $out->read(); - ok ($got !~ /\.\.\./s, 'check that there is no title'); + unlike($got, qr/\.\.\./s, 'check that there is no title'); like ($got, $graph_dissassembly, "Should find the output graph somewhere"); check_graph_vs_output ($chart, $got); } diff --git a/lib/locale.pm b/lib/locale.pm index fb91f0a..e2317ca 100644 --- a/lib/locale.pm +++ b/lib/locale.pm @@ -1,6 +1,6 @@ package locale; -our $VERSION = '1.07'; +our $VERSION = '1.08'; use Config; $Carp::Internal{ (__PACKAGE__) } = 1; @@ -97,15 +97,14 @@ sub import { $arg =~ s/^://; eval { require POSIX; import POSIX 'locale_h'; }; - unless (defined &POSIX::LC_CTYPE) { - return; - } # Map our names to the ones defined by POSIX my $LC = "LC_" . uc($arg); my $bit = eval "&POSIX::$LC"; - if (defined $bit) { + if (defined $bit) { # XXX Should we warn that this category isn't + # supported on this platform, or make it + # always be the C locale? # Verify our assumption. if (! ($bit >= 0 && $bit < 31)) { diff --git a/lib/meta_notation.pm b/lib/meta_notation.pm index 2f85cd3..eea8587 100644 --- a/lib/meta_notation.pm +++ b/lib/meta_notation.pm @@ -4,7 +4,7 @@ use warnings; # A tiny private library routine which is a helper to several Perl core # modules, to allow a paradigm to be implemented in a single place. The name, # contents, or even the existence of this file may be changed at any time and -# are NOT to be used by anthing outside the Perl core. +# are NOT to be used by anything outside the Perl core. sub _meta_notation ($) { @@ -22,10 +22,8 @@ sub _meta_notation ($) { # On ASCII platforms, the upper-Latin1-range characters are converted to # Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc. # This is how it always has worked, so is continued that way for backwards - # compatibility. XXX Wrong, but the way it has always worked is that \x80 - # .. \x9F are converted to M- followed by a literal control char. This - # probably has escaped attention due to the limited domains this code has - # been applied to. ext/SDBM_File/dbu.c does this right. + # compatibility. The range \x80 .. \x9F becomes M-^@ .. M-^A, M-^B, ... + # M-^Z, M-^[, M-^\, M-^], M-^, M-^_ # # On EBCDIC platforms, the upper-Latin1-range characters are converted # into '\x{...}' Meta notation doesn't make sense on EBCDIC platforms @@ -40,7 +38,8 @@ sub _meta_notation ($) { sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg; $string =~ s/\c?/^?/g; if (ord("A") == 65) { - $string =~ s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg; + $string =~ s/([\200-\237])/sprintf("M-^%c",(ord($1)&0177)^64)/eg; + $string =~ s/([\240-\377])/sprintf("M-%c" ,ord($1)&0177)/eg; } else { no warnings 'experimental::regex_sets'; diff --git a/lib/meta_notation.t b/lib/meta_notation.t index d89d50c..b687dbd 100644 --- a/lib/meta_notation.t +++ b/lib/meta_notation.t @@ -15,9 +15,17 @@ if ($@) { } else { - is(_meta_notation("\007\010\011\c?Z\x{103}"), "^G^H^I^?Z\x{103}"); + is(_meta_notation("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C"), + "^@^A^B^C^D^E^F^G^H^I^J^K^L"); + is(_meta_notation("\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19"), + "^M^N^O^P^Q^R^S^T^U^V^W^X^Y"); + is(_meta_notation("\x1A\x1B\x1C\x1D\x1E\x1F\c?"), + "^Z^[^\\^]^^^_^?"); + is(_meta_notation("09%AZaz\x{103}"), "09%AZaz\x{103}"); if ($::IS_ASCII || $::IS_ASCII) { + is(_meta_notation("\x7f\x80\x81\x82\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1"), + '^?M-^@M-^AM-^BM-^ZM-^[M-^\\M-^]M-^^M-^_M- M-!'); is(_meta_notation("\x{c1}\x{e2}"), 'M-AM-b'); is(_meta_notation("\x{df}"), 'M-_'); } diff --git a/lib/perl5db.pl b/lib/perl5db.pl index 0d240ae..68f7e50 100644 --- a/lib/perl5db.pl +++ b/lib/perl5db.pl @@ -528,7 +528,7 @@ BEGIN { # Debugger for Perl 5.00x; perl5db.pl patch level: use vars qw($VERSION $header); -$VERSION = '1.49_01'; +$VERSION = '1.49_02'; $header = "perl5db.pl version $VERSION"; @@ -2490,7 +2490,11 @@ EOP # 'm' is method. # 'v' is the value (i.e: method name or subroutine ref). # 's' is subroutine. -my %cmd_lookup = +my %cmd_lookup; + +BEGIN +{ + %cmd_lookup = ( '-' => { t => 'm', v => '_handle_dash_command', }, '.' => { t => 's', v => \&_DB__handle_dot_command, }, @@ -2523,6 +2527,7 @@ my %cmd_lookup = (map { $_ => {t => 'm', v => '_handle_cmd_wrapper_commands' }, } qw(a A b B e E h i l L M o O v w W)), ); +}; sub DB { diff --git a/lib/perl5db.t b/lib/perl5db.t index 98a3686..0c4fc42 100644 --- a/lib/perl5db.t +++ b/lib/perl5db.t @@ -29,7 +29,7 @@ BEGIN { $ENV{PERL_RL} = 'Perl'; # Suppress system Term::ReadLine::Gnu } -plan(121); +plan(123); my $rc_filename = '.perldb'; @@ -2799,6 +2799,22 @@ SKIP: ); } +{ + # perl 5 RT #126735 regression bug. + local $ENV{PERLDB_OPTS} = "NonStop=0 RemotePort=non-existent-host.tld:9001"; + my $output = runperl( stdin => "q\n", stderr => 1, switches => [ '-d' ], prog => '../lib/perl5db/t/fact' ); + like( + $output, + qr/^Unable to connect to remote host:/ms, + 'Tried to connect.', + ); + unlike( + $output, + qr/syntax error/, + 'Can quit from the debugger after a wrong RemotePort', + ); +} + END { 1 while unlink ($rc_filename, $out_fn); } diff --git a/lib/strict.pm b/lib/strict.pm index 03ed21c..93f2122 100644 --- a/lib/strict.pm +++ b/lib/strict.pm @@ -1,6 +1,6 @@ package strict; -$strict::VERSION = "1.09"; +$strict::VERSION = "1.10"; # Verify that we're called correctly so that strictures will work. unless ( __FILE__ =~ /(^|[\/\\])\Q${\__PACKAGE__}\E\.pmc?$/ ) { @@ -94,6 +94,10 @@ strict - Perl pragma to restrict unsafe constructs =head1 DESCRIPTION +The C pragma disables certain Perl expressions that could behave +unexpectedly or are difficult to debug, turning them into errors. The +effect of this pragma is limited to the current file or scope block. + If no import list is supplied, all possible restrictions are assumed. (This is the safest mode to operate in, but is sometimes too strict for casual programming.) Currently, there are three possible things to be diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 5711791..be66780 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -3100,7 +3100,7 @@ END # Not currently used, not fully tested. # sub peek { -# # Non-destructive look-ahead one non-adjusted, non-comment, non-blank +# # Non-destructive lookahead one non-adjusted, non-comment, non-blank # # record. Not callable from an each_line_handler(), nor does it call # # an each_line_handler() on the line. # @@ -19332,10 +19332,10 @@ if (! $rebuild) { } } if (! $rebuild) { - print "Files seem to be ok, not bothering to rebuild. Add '-w' option to force build\n"; + print "$0: Files seem to be ok, not bothering to rebuild. Add '-w' option to force build\n"; exit(0); } -print "Must rebuild tables.\n" if $verbosity >= $VERBOSE; +print "$0: Must rebuild tables.\n" if $verbosity >= $VERBOSE; # Ready to do the major processing. First create the perl pseudo-property. $perl = Property->new('perl', Type => $NON_STRING, Perl_Extension => 1); @@ -19544,11 +19544,16 @@ utf8::upgrade($breakable_utf8); my $nobreak_utf8 = my $nobreak = chr(utf8::unicode_to_native(0xD7)); utf8::upgrade($nobreak_utf8); -use Config; +my $are_ctype_locales_available; my $utf8_locale; chdir 't' if -d 't'; eval { require "./loc_tools.pl" }; -$utf8_locale = &find_utf8_ctype_locale if defined &find_utf8_ctype_locale; +if (defined &locales_enabled) { + $are_ctype_locales_available = locales_enabled('LC_CTYPE'); + if ($are_ctype_locales_available) { + $utf8_locale = &find_utf8_ctype_locale; + } +} sub _test_break($$) { # Test various break property matches. The 2nd parameter gives the @@ -19628,9 +19633,13 @@ sub _test_break($$) { $display_upgrade = " (utf8-upgraded)"; } - # The /l modifier has C after it to indicate the locale to try - my @modifiers = qw(a aa d lC u i); - push @modifiers, "l$utf8_locale" if defined $utf8_locale; + my @modifiers = qw(a aa d u i); + if ($are_ctype_locales_available) { + push @modifiers, "l" if defined &find_utf8_ctype_locale; + + # The /l modifier has C after it to indicate the locale to try + push @modifiers, "lC"; + } # Test for each of the regex modifiers. for my $modifier (@modifiers) { @@ -19640,13 +19649,7 @@ sub _test_break($$) { if ($modifier =~ / ^ l (.*) /x) { my $locale = $1; $display_locale = "(locale = $locale)"; - use Config; - if (defined $Config{d_setlocale}) { - eval { require POSIX; import POSIX 'locale_h'; }; - if (defined &POSIX::LC_CTYPE) { - POSIX::setlocale(&POSIX::LC_CTYPE, $locale); - } - } + POSIX::setlocale(&POSIX::LC_CTYPE, $locale); $modifier = 'l'; } diff --git a/lib/utf8.pm b/lib/utf8.pm index 23fbfac..1177841 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -2,7 +2,7 @@ package utf8; $utf8::hint_bits = 0x00800000; -our $VERSION = '1.17'; +our $VERSION = '1.18'; sub import { $^H |= $utf8::hint_bits; diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 0d2e662..66c968a 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -20,7 +20,7 @@ sub _loose_name ($) { # out blanks, underscores and dashes. The complication stems from the # grandfathered-in 'L_', which retains a single trailing underscore. - (my $loose = $_[0]) =~ tr/-_ \t//d; + (my $loose = $_[0]) =~ s/[-_ \t]//g; return $loose if $loose !~ / ^ (?: is | to )? l $/x; return 'l_' if $_[0] =~ / l .* _ /x; # If original had a trailing '_' @@ -62,7 +62,7 @@ sub _loose_name ($) { ## op.c:pmtrans -- for tr/// and y/// ## regexec.c:regclass_swash -- for /[]/, \p, and \P ## utf8.c:is_utf8_common -- for common Unicode properties - ## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i + ## utf8.c:S__to_utf8_case -- for lc, uc, ucfirst, etc. and //i ## Unicode::UCD::prop_invlist ## Unicode::UCD::prop_invmap ## diff --git a/make_ext.pl b/make_ext.pl index 223f67e..0745049 100644 --- a/make_ext.pl +++ b/make_ext.pl @@ -616,7 +616,7 @@ sub just_pm_to_blib { my ($first) = $mname =~ /^([^:]+)/; my $pm_to_blib = IS_VMS ? 'pm_to_blib.ts' : 'pm_to_blib'; - my $silent = defined $ENV{MAKEFLAGS} and $ENV{MAKEFLAGS} =~ /\b(s|silent|quiet)\b/; + my $silent = defined $ENV{MAKEFLAGS} && $ENV{MAKEFLAGS} =~ /\b(s|silent|quiet)\b/; foreach my $leaf (<*>) { if (-d $leaf) { diff --git a/mg.c b/mg.c index 0f1c314..f8d8f33 100644 --- a/mg.c +++ b/mg.c @@ -2568,13 +2568,92 @@ Perl_magic_setlvref(pTHX_ SV *sv, MAGIC *mg) return 0; } +static void +S_set_dollarzero(pTHX_ SV *sv) + PERL_TSA_REQUIRES(PL_dollarzero_mutex) +{ +#ifdef USE_ITHREADS + dVAR; +#endif + const char *s; + STRLEN len; + I32 i; +#ifdef HAS_SETPROCTITLE + /* The BSDs don't show the argv[] in ps(1) output, they + * show a string from the process struct and provide + * the setproctitle() routine to manipulate that. */ + if (PL_origalen != 1) { + s = SvPV_const(sv, len); +# if __FreeBSD_version > 410001 + /* The leading "-" removes the "perl: " prefix, + * but not the "(perl) suffix from the ps(1) + * output, because that's what ps(1) shows if the + * argv[] is modified. */ + setproctitle("-%s", s); +# else /* old FreeBSDs, NetBSD, OpenBSD, anyBSD */ + /* This doesn't really work if you assume that + * $0 = 'foobar'; will wipe out 'perl' from the $0 + * because in ps(1) output the result will be like + * sprintf("perl: %s (perl)", s) + * I guess this is a security feature: + * one (a user process) cannot get rid of the original name. + * --jhi */ + setproctitle("%s", s); +# endif + } +#elif defined(__hpux) && defined(PSTAT_SETCMD) + if (PL_origalen != 1) { + union pstun un; + s = SvPV_const(sv, len); + un.pst_command = (char *)s; + pstat(PSTAT_SETCMD, un, len, 0, 0); + } +#else + if (PL_origalen > 1) { + /* PL_origalen is set in perl_parse(). */ + s = SvPV_force(sv,len); + if (len >= (STRLEN)PL_origalen-1) { + /* Longer than original, will be truncated. We assume that + * PL_origalen bytes are available. */ + Copy(s, PL_origargv[0], PL_origalen-1, char); + } + else { + /* Shorter than original, will be padded. */ +#ifdef PERL_DARWIN + /* Special case for Mac OS X: see [perl #38868] */ + const int pad = 0; +#else + /* Is the space counterintuitive? Yes. + * (You were expecting \0?) + * Does it work? Seems to. (In Linux 2.4.20 at least.) + * --jhi */ + const int pad = ' '; +#endif + Copy(s, PL_origargv[0], len, char); + PL_origargv[0][len] = 0; + memset(PL_origargv[0] + len + 1, + pad, PL_origalen - len - 1); + } + PL_origargv[0][PL_origalen-1] = 0; + for (i = 1; i < PL_origargc; i++) + PL_origargv[i] = 0; +#ifdef HAS_PRCTL_SET_NAME + /* Set the legacy process name in addition to the POSIX name on Linux */ + if (prctl(PR_SET_NAME, (unsigned long)s, 0, 0, 0) != 0) { + /* diag_listed_as: SKIPME */ + Perl_croak(aTHX_ "Can't set $0 with prctl(): %s", Strerror(errno)); + } +#endif + } +#endif +} + int Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) { #ifdef USE_ITHREADS dVAR; #endif - const char *s; I32 paren; const REGEXP * rx; I32 i; @@ -2626,10 +2705,12 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) case '\004': /* ^D */ #ifdef DEBUGGING - s = SvPV_nolen_const(sv); - PL_debug = get_debug_opts(&s, 0) | DEBUG_TOP_FLAG; - if (DEBUG_x_TEST || DEBUG_B_TEST) - dump_all_perl(!DEBUG_B_TEST); + { + const char *s = SvPV_nolen_const(sv); + PL_debug = get_debug_opts(&s, 0) | DEBUG_TOP_FLAG; + if (DEBUG_x_TEST || DEBUG_B_TEST) + dump_all_perl(!DEBUG_B_TEST); + } #else PL_debug = (SvIV(sv)) | DEBUG_TOP_FLAG; #endif @@ -2823,12 +2904,12 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) break; case '^': Safefree(IoTOP_NAME(GvIOp(PL_defoutgv))); - s = IoTOP_NAME(GvIOp(PL_defoutgv)) = savesvpv(sv); + IoTOP_NAME(GvIOp(PL_defoutgv)) = savesvpv(sv); IoTOP_GV(GvIOp(PL_defoutgv)) = gv_fetchsv(sv, GV_ADD, SVt_PVIO); break; case '~': Safefree(IoFMT_NAME(GvIOp(PL_defoutgv))); - s = IoFMT_NAME(GvIOp(PL_defoutgv)) = savesvpv(sv); + IoFMT_NAME(GvIOp(PL_defoutgv)) = savesvpv(sv); IoFMT_GV(GvIOp(PL_defoutgv)) = gv_fetchsv(sv, GV_ADD, SVt_PVIO); break; case '=': @@ -3123,74 +3204,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) break; case '0': LOCK_DOLLARZERO_MUTEX; -#ifdef HAS_SETPROCTITLE - /* The BSDs don't show the argv[] in ps(1) output, they - * show a string from the process struct and provide - * the setproctitle() routine to manipulate that. */ - if (PL_origalen != 1) { - s = SvPV_const(sv, len); -# if __FreeBSD_version > 410001 - /* The leading "-" removes the "perl: " prefix, - * but not the "(perl) suffix from the ps(1) - * output, because that's what ps(1) shows if the - * argv[] is modified. */ - setproctitle("-%s", s); -# else /* old FreeBSDs, NetBSD, OpenBSD, anyBSD */ - /* This doesn't really work if you assume that - * $0 = 'foobar'; will wipe out 'perl' from the $0 - * because in ps(1) output the result will be like - * sprintf("perl: %s (perl)", s) - * I guess this is a security feature: - * one (a user process) cannot get rid of the original name. - * --jhi */ - setproctitle("%s", s); -# endif - } -#elif defined(__hpux) && defined(PSTAT_SETCMD) - if (PL_origalen != 1) { - union pstun un; - s = SvPV_const(sv, len); - un.pst_command = (char *)s; - pstat(PSTAT_SETCMD, un, len, 0, 0); - } -#else - if (PL_origalen > 1) { - /* PL_origalen is set in perl_parse(). */ - s = SvPV_force(sv,len); - if (len >= (STRLEN)PL_origalen-1) { - /* Longer than original, will be truncated. We assume that - * PL_origalen bytes are available. */ - Copy(s, PL_origargv[0], PL_origalen-1, char); - } - else { - /* Shorter than original, will be padded. */ -#ifdef PERL_DARWIN - /* Special case for Mac OS X: see [perl #38868] */ - const int pad = 0; -#else - /* Is the space counterintuitive? Yes. - * (You were expecting \0?) - * Does it work? Seems to. (In Linux 2.4.20 at least.) - * --jhi */ - const int pad = ' '; -#endif - Copy(s, PL_origargv[0], len, char); - PL_origargv[0][len] = 0; - memset(PL_origargv[0] + len + 1, - pad, PL_origalen - len - 1); - } - PL_origargv[0][PL_origalen-1] = 0; - for (i = 1; i < PL_origargc; i++) - PL_origargv[i] = 0; -#ifdef HAS_PRCTL_SET_NAME - /* Set the legacy process name in addition to the POSIX name on Linux */ - if (prctl(PR_SET_NAME, (unsigned long)s, 0, 0, 0) != 0) { - /* diag_listed_as: SKIPME */ - Perl_croak(aTHX_ "Can't set $0 with prctl(): %s", Strerror(errno)); - } -#endif - } -#endif + S_set_dollarzero(aTHX_ sv); UNLOCK_DOLLARZERO_MUTEX; break; } diff --git a/numeric.c b/numeric.c index 90b586d..f1de219 100644 --- a/numeric.c +++ b/numeric.c @@ -1630,10 +1630,22 @@ Users should just always call C. int Perl_signbit(NV x) { # ifdef Perl_fp_class_nzero - if (x == 0) - return Perl_fp_class_nzero(x); -# endif + return Perl_fp_class_nzero(x); + /* Try finding the high byte, and assume it's highest bit + * is the sign. This assumption is probably wrong somewhere. */ +# elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN + return (((unsigned char *)&x)[9] & 0x80); +# elif defined(NV_LITTLE_ENDIAN) + /* Note that NVSIZE is sizeof(NV), which would make the below be + * wrong if the end bytes are unused, which happens with the x86 + * 80-bit long doubles, which is why take care of that above. */ + return (((unsigned char *)&x)[NVSIZE - 1] & 0x80); +# elif defined(NV_BIG_ENDIAN) + return (((unsigned char *)&x)[0] & 0x80); +# else + /* This last resort fallback is wrong for the negative zero. */ return (x < 0.0) ? 1 : 0; +# endif } #endif diff --git a/op.c b/op.c index a9cfe72..0de303c 100644 --- a/op.c +++ b/op.c @@ -719,7 +719,7 @@ Perl_op_free(pTHX_ OP *o) * * we've errored, as op flags are often left in an * inconsistent state then. Note that an error when * compiling the main program leaves PL_parser NULL, so - * we can't spot faults in the main code, onoly + * we can't spot faults in the main code, only * evaled/required code */ #ifdef DEBUGGING if ( o->op_ppaddr == PL_ppaddr[o->op_type] @@ -1194,6 +1194,7 @@ Perl_op_null(pTHX_ OP *o) void Perl_op_refcnt_lock(pTHX) + PERL_TSA_ACQUIRE(PL_op_mutex) { #ifdef USE_ITHREADS dVAR; @@ -1204,6 +1205,7 @@ Perl_op_refcnt_lock(pTHX) void Perl_op_refcnt_unlock(pTHX) + PERL_TSA_RELEASE(PL_op_mutex) { #ifdef USE_ITHREADS dVAR; @@ -4154,7 +4156,8 @@ Perl_localize(pTHX_ OP *o, I32 lex) s++; while (1) { - if (*s && strchr("@$%*", *s) && *++s + if (*s && (strchr("@$%", *s) || (!lex && *s == '*')) + && *++s && (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s))) { s++; sigil = TRUE; @@ -8395,7 +8398,8 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, block->op_next = 0; if (ps && !*ps && !attrs && !CvLVALUE(PL_compcv)) const_sv = - S_op_const_sv(aTHX_ start, PL_compcv, CvCLONE(PL_compcv)); + S_op_const_sv(aTHX_ start, PL_compcv, + cBOOL(CvCLONE(PL_compcv))); else const_sv = NULL; } @@ -11155,11 +11159,20 @@ OP * Perl_ck_entersub_args_list(pTHX_ OP *entersubop) { OP *aop; + PERL_ARGS_ASSERT_CK_ENTERSUB_ARGS_LIST; + aop = cUNOPx(entersubop)->op_first; if (!OpHAS_SIBLING(aop)) aop = cUNOPx(aop)->op_first; for (aop = OpSIBLING(aop); OpHAS_SIBLING(aop); aop = OpSIBLING(aop)) { + /* skip the extra attributes->import() call implicitly added in + * something like foo(my $x : bar) + */ + if ( aop->op_type == OP_ENTERSUB + && (aop->op_flags & OPf_WANT) == OPf_WANT_VOID + ) + continue; list(aop); op_lvalue(aop, OP_ENTERSUB); } @@ -13114,6 +13127,11 @@ Perl_rpeep(pTHX_ OP *o) } redo: + + /* oldoldop -> oldop -> o should be a chain of 3 adjacent ops */ + assert(!oldoldop || oldoldop->op_next == oldop); + assert(!oldop || oldop->op_next == o); + /* By default, this op has now been optimised. A couple of cases below clear this again. */ o->op_opt = 1; @@ -13435,9 +13453,10 @@ Perl_rpeep(pTHX_ OP *o) op_null(o); if (oldop) oldop->op_next = nextop; + o = nextop; /* Skip (old)oldop assignment since the current oldop's op_next already points to the next op. */ - continue; + goto redo; } } break; @@ -13649,9 +13668,17 @@ Perl_rpeep(pTHX_ OP *o) break; /* there's a biggest base we can fit into a - * SAVEt_CLEARPADRANGE in pp_padrange */ - if (intro && base > - (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))) + * SAVEt_CLEARPADRANGE in pp_padrange. + * (The sizeof() stuff will be constant-folded, and is + * intended to avoid getting "comparison is always false" + * compiler warnings) + */ + if ( intro + && (8*sizeof(base) > + 8*sizeof(UV)-OPpPADRANGE_COUNTSHIFT-SAVE_TIGHT_SHIFT + ? base : 0) > + (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT)) + ) break; /* Success! We've got another valid pad op to optimise away */ @@ -13834,7 +13861,8 @@ Perl_rpeep(pTHX_ OP *o) oldoldop = NULL; goto redo; } - o = oldop; + o = oldop->op_next; + goto redo; } else if (o->op_next->op_type == OP_RV2SV) { if (!(o->op_next->op_private & OPpDEREF)) { @@ -14131,6 +14159,11 @@ Perl_rpeep(pTHX_ OP *o) op_null(o); enter->op_private |= OPpITER_REVERSED; iter->op_private |= OPpITER_REVERSED; + + oldoldop = NULL; + oldop = ourlast; + o = oldop->op_next; + goto redo; break; } diff --git a/op.h b/op.h index ff0713b..13afa8a 100644 --- a/op.h +++ b/op.h @@ -1075,6 +1075,12 @@ C is non-null. For a higher-level interface, see C>. #define MDEREF_MASK 0x7F #define MDEREF_SHIFT 7 +#if defined(PERL_IN_DOOP_C) || defined(PERL_IN_PP_C) +static const char * const deprecated_above_ff_msg + = "Use of strings with code points over 0xFF as arguments to " + "%s operator is deprecated"; +#endif + /* * ex: set ts=8 sts=4 sw=4 et: diff --git a/patchlevel.h b/patchlevel.h index dd28c67..87ceb95 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -15,7 +15,7 @@ #define PERL_REVISION 5 /* age */ #define PERL_VERSION 23 /* epoch */ -#define PERL_SUBVERSION 5 /* generation */ +#define PERL_SUBVERSION 6 /* generation */ /* The following numbers describe the earliest compatible version of Perl ("compatibility" here being defined as sufficient binary/API @@ -36,7 +36,7 @@ */ #define PERL_API_REVISION 5 #define PERL_API_VERSION 23 -#define PERL_API_SUBVERSION 5 +#define PERL_API_SUBVERSION 6 /* XXX Note: The selection of non-default Configure options, such as -Duselonglong may invalidate these settings. Currently, Configure diff --git a/perl.h b/perl.h index c11548d..960a8a5 100644 --- a/perl.h +++ b/perl.h @@ -421,6 +421,16 @@ # define GCC_DIAG_IGNORE(w) # define GCC_DIAG_RESTORE #endif +/* for clang specific pragmas */ +#if defined(__clang__) || defined(__clang) +# define CLANG_DIAG_PRAGMA(x) _Pragma (#x) +# define CLANG_DIAG_IGNORE(x) _Pragma("clang diagnostic push") \ + CLANG_DIAG_PRAGMA(clang diagnostic ignored #x) +# define CLANG_DIAG_RESTORE _Pragma("clang diagnostic pop") +#else +# define CLANG_DIAG_IGNORE(w) +# define CLANG_DIAG_RESTORE +#endif #define NOOP /*EMPTY*/(void)0 /* cea2e8a9dd23747f accidentally lost the comment originally from the first @@ -3052,6 +3062,103 @@ freeing any remaining Perl interpreters. * May make sense to have threads after "*ish.h" anyway */ +/* clang Thread Safety Analysis/Annotations/Attributes + * http://clang.llvm.org/docs/ThreadSafetyAnalysis.html + * + * Available since clang 3.6-ish (appeared in 3.4, but shaky still in 3.5). + * Apple XCode hijacks __clang_major__ and __clang_minor__ + * (6.1 means really clang 3.6), so needs extra hijinks + * (could probably also test the contents of __apple_build_version__). + */ +#if defined(USE_ITHREADS) && defined(I_PTHREAD) && \ + defined(__clang__) && \ + !defined(SWIG) && \ + ((!defined(__apple_build_version__) && \ + ((__clang_major__ == 3 && __clang_minor__ >= 6) || \ + (__clang_major__ >= 4))) || \ + (defined(__apple_build_version__) && \ + ((__clang_major__ == 6 && __clang_minor__ >= 1) || \ + (__clang_major__ >= 7)))) +# define PERL_TSA__(x) __attribute__((x)) +# define PERL_TSA_ACTIVE +#else +# define PERL_TSA__(x) /* No TSA, make TSA attributes no-ops. */ +# undef PERL_TSA_ACTIVE +#endif + +/* PERL_TSA_CAPABILITY() is used to annotate typedefs. + * typedef old_type PERL_TSA_CAPABILITY("mutex") new_type; + */ +#define PERL_TSA_CAPABILITY(x) \ + PERL_TSA__(capability(x)) + +/* In the below examples the mutex must be lexically visible, usually + * either as global variables, or as function arguments. */ + +/* PERL_TSA_GUARDED_BY() is used to annotate global variables. + * + * Foo foo PERL_TSA_GUARDED_BY(mutex); + */ +#define PERL_TSA_GUARDED_BY(x) \ + PERL_TSA__(guarded_by(x)) + +/* PERL_TSA_PT_GUARDED_BY() is used to annotate global pointers. + * The data _behind_ the pointer is guarded. + * + * Foo* ptr PERL_TSA_PT_GUARDED_BY(mutex); + */ +#define PERL_TSA_PT_GUARDED_BY(x) \ + PERL_TSA__(pt_guarded_by(x)) + +/* PERL_TSA_REQUIRES() is used to annotate functions. + * The caller MUST hold the resource when calling the function. + * + * void Foo() PERL_TSA_REQUIRES(mutex); + */ +#define PERL_TSA_REQUIRES(x) \ + PERL_TSA__(requires_capability(x)) + +/* PERL_TSA_EXCLUDES() is used to annotate functions. + * The caller MUST NOT hold resource when calling the function. + * + * EXCLUDES should be used when the function first acquires + * the resource and then releases it. Use to avoid deadlock. + * + * void Foo() PERL_TSA_EXCLUDES(mutex); + */ +#define PERL_TSA_EXCLUDES(x) \ + PERL_TSA__(locks_excluded(x)) + +/* PERL_TSA_ACQUIRE() is used to annotate functions. + * The caller MUST NOT hold the resource when calling the function, + * and the function will acquire the resource. + * + * void Foo() PERL_TSA_ACQUIRE(mutex); + */ +#define PERL_TSA_ACQUIRE(x) \ + PERL_TSA__(acquire_capability(x)) + +/* PERL_TSA_RELEASE() is used to annotate functions. + * The caller MUST hold the resource when calling the function, + * and the function will release the resource. + * + * void Foo() PERL_TSA_RELEASE(mutex); + */ +#define PERL_TSA_RELEASE(x) \ + PERL_TSA__(release_capability(x)) + +/* PERL_TSA_NO_TSA is used to annotate functions. + * Used when being intentionally unsafe, or when the code is too + * complicated for the analysis. Use sparingly. + * + * void Foo() PERL_TSA_NO_TSA; + */ +#define PERL_TSA_NO_TSA \ + PERL_TSA__(no_thread_safety_analysis) + +/* There are more annotations/attributes available, see the clang + * documentation for details. */ + #if defined(USE_ITHREADS) # ifdef NETWARE # include @@ -3073,7 +3180,7 @@ typedef void * perl_key; # include # endif typedef pthread_t perl_os_thread; -typedef pthread_mutex_t perl_mutex; +typedef pthread_mutex_t PERL_TSA_CAPABILITY("mutex") perl_mutex; typedef pthread_cond_t perl_cond; typedef pthread_key_t perl_key; # endif /* I_MACH_CTHREADS */ @@ -3082,6 +3189,25 @@ typedef pthread_key_t perl_key; # endif /* NETWARE */ #endif /* USE_ITHREADS */ +#ifdef PERL_TSA_ACTIVE +/* Since most pthread mutex interfaces have not been annotated, we + * need to have these wrappers. The NO_TSA annotation is quite ugly + * but it cannot be avoided in plain C, unlike in C++, where one could + * e.g. use ACQUIRE() with no arg on a mutex lock method. + * + * The bodies of these wrappers are in util.c + * + * TODO: however, some platforms are starting to get these clang + * thread safety annotations for pthreads, for example FreeBSD. + * Do we need a way to a bypass these wrappers? */ +int perl_tsa_mutex_lock(perl_mutex* mutex) + PERL_TSA_ACQUIRE(*mutex) + PERL_TSA_NO_TSA; +int perl_tsa_mutex_unlock(perl_mutex* mutex) + PERL_TSA_RELEASE(*mutex) + PERL_TSA_NO_TSA; +#endif + #if defined(WIN32) # include "win32.h" #endif @@ -6632,16 +6758,21 @@ extern void moncontrol(int); # if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN || \ LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN || \ - LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE # define LONGDOUBLE_LITTLE_ENDIAN # endif # if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN || \ LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN || \ - LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE # define LONGDOUBLE_BIG_ENDIAN # endif +# if LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE +# define LONGDOUBLE_MIX_ENDIAN +# endif + # if LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN || \ LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN # define LONGDOUBLE_X86_80_BIT @@ -6650,8 +6781,10 @@ extern void moncontrol(int); # endif # endif -# if LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN || \ - LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN +# if LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE # define LONGDOUBLE_DOUBLEDOUBLE # endif @@ -6682,6 +6815,9 @@ extern void moncontrol(int); # ifdef LONGDOUBLE_BIG_ENDIAN # define NV_BIG_ENDIAN # endif +# ifdef LONGDOUBLE_MIX_ENDIAN +# define NV_MIX_ENDIAN +# endif #endif /* NaNs (not-a-numbers) can carry payload bits, in addition to @@ -6797,10 +6933,14 @@ extern void moncontrol(int); # define NV_NAN_QS_BYTE_OFFSET 7 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN # define NV_NAN_QS_BYTE_OFFSET 2 -# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE # define NV_NAN_QS_BYTE_OFFSET 13 -# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE # define NV_NAN_QS_BYTE_OFFSET 1 +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE +# define NV_NAN_QS_BYTE_OFFSET 9 +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE +# define NV_NAN_QS_BYTE_OFFSET 6 # else # error "Unexpected long double format" # endif @@ -6842,8 +6982,10 @@ extern void moncontrol(int); LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN) # define NV_NAN_QS_BIT_SHIFT 6 /* 0x40 */ #elif defined(USE_LONG_DOUBLE) && \ - (LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN || \ - LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN) + (LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE) # define NV_NAN_QS_BIT_SHIFT 3 /* 0x08, but not via NV_NAN_BITS */ #else # define NV_NAN_QS_BIT_SHIFT ((NV_NAN_BITS) % 8) /* usually 3, or 0x08 */ @@ -6884,6 +7026,10 @@ extern void moncontrol(int); * 0xFF means "don't go here".*/ /* Shorthands to avoid typoses. */ +#define NV_NAN_PAYLOAD_MASK_SKIP_EIGHT \ + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 +#define NV_NAN_PAYLOAD_PERM_SKIP_EIGHT \ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff #define NV_NAN_PAYLOAD_PERM_0_TO_7 \ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 #define NV_NAN_PAYLOAD_PERM_7_TO_0 \ @@ -6962,17 +7108,28 @@ extern void moncontrol(int); # else # error "Unexpected x86 80-bit big-endian long double format" # endif -# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN -/* For double-double we assume only the first double is used for NaN. */ +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE +/* For double-double we assume only the first double (in LE or BE terms) + * is used for NaN. */ # define NV_NAN_PAYLOAD_MASK \ - NV_NAN_PAYLOAD_MASK_IEEE_754_64_LE + NV_NAN_PAYLOAD_MASK_SKIP_EIGHT, NV_NAN_PAYLOAD_MASK_IEEE_754_64_LE # define NV_NAN_PAYLOAD_PERM \ - NV_NAN_PAYLOAD_PERM_IEEE_754_64_LE -# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + NV_NAN_PAYLOAD_PERM_SKIP_EIGHT, NV_NAN_PAYLOAD_PERM_IEEE_754_64_LE +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE # define NV_NAN_PAYLOAD_MASK \ NV_NAN_PAYLOAD_MASK_IEEE_754_64_BE # define NV_NAN_PAYLOAD_PERM \ NV_NAN_PAYLOAD_PERM_IEEE_754_64_BE +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE +# define NV_NAN_PAYLOAD_MASK \ + NV_NAN_PAYLOAD_MASK_IEEE_754_64_LE +# define NV_NAN_PAYLOAD_PERM \ + NV_NAN_PAYLOAD_PERM_IEEE_754_64_LE +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE +# define NV_NAN_PAYLOAD_MASK \ + NV_NAN_PAYLOAD_MASK_SKIP_EIGHT, NV_NAN_PAYLOAD_MASK_IEEE_754_64_BE +# define NV_NAN_PAYLOAD_PERM \ + NV_NAN_PAYLOAD_PERM_SKIP_EIGHT, NV_NAN_PAYLOAD_PERM_IEEE_754_64_BE # else # error "Unexpected long double format" # endif diff --git a/perlio.c b/perlio.c index 8ab47e4..343c62e 100644 --- a/perlio.c +++ b/perlio.c @@ -2204,7 +2204,9 @@ PerlIOBase_dup(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *param, int flags) /* Must be called with PL_perlio_mutex locked. */ static void -S_more_refcounted_fds(pTHX_ const int new_fd) { +S_more_refcounted_fds(pTHX_ const int new_fd) + PERL_TSA_REQUIRES(PL_perlio_mutex) +{ dVAR; const int old_max = PL_perlio_fd_refcnt_size; const int new_max = 16 + (new_fd & ~15); @@ -3237,6 +3239,28 @@ PerlIOStdio_close(pTHX_ PerlIO *f) return 0; if (stdio == stdout || stdio == stderr) return PerlIO_flush(f); + } +#ifdef USE_ITHREADS + MUTEX_LOCK(&PL_perlio_mutex); + /* Right. We need a mutex here because for a brief while we + will have the situation that fd is actually closed. Hence if + a second thread were to get into this block, its dup() would + likely return our fd as its dupfd. (after all, it is closed) + Then if we get to the dup2() first, we blat the fd back + (messing up its temporary as a side effect) only for it to + then close its dupfd (== our fd) in its close(dupfd) */ + + /* There is, of course, a race condition, that any other thread + trying to input/output/whatever on this fd will be stuffed + for the duration of this little manoeuvrer. Perhaps we + should hold an IO mutex for the duration of every IO + operation if we know that invalidate doesn't work on this + platform, but that would suck, and could kill performance. + + Except that correctness trumps speed. + Advice from klortho #11912. */ +#endif + if (invalidate) { /* Tricky - must fclose(stdio) to free memory but not close(fd) Use Sarathy's trick from maint-5.6 to invalidate the fileno slot of the FILE * @@ -3245,30 +3269,9 @@ PerlIOStdio_close(pTHX_ PerlIO *f) SAVE_ERRNO; invalidate = PerlIOStdio_invalidate_fileno(aTHX_ stdio); if (!invalidate) { -#ifdef USE_ITHREADS - MUTEX_LOCK(&PL_perlio_mutex); - /* Right. We need a mutex here because for a brief while we - will have the situation that fd is actually closed. Hence if - a second thread were to get into this block, its dup() would - likely return our fd as its dupfd. (after all, it is closed) - Then if we get to the dup2() first, we blat the fd back - (messing up its temporary as a side effect) only for it to - then close its dupfd (== our fd) in its close(dupfd) */ - - /* There is, of course, a race condition, that any other thread - trying to input/output/whatever on this fd will be stuffed - for the duration of this little manoeuvrer. Perhaps we - should hold an IO mutex for the duration of every IO - operation if we know that invalidate doesn't work on this - platform, but that would suck, and could kill performance. - - Except that correctness trumps speed. - Advice from klortho #11912. */ -#endif dupfd = PerlLIO_dup(fd); #ifdef USE_ITHREADS if (dupfd < 0) { - MUTEX_UNLOCK(&PL_perlio_mutex); /* Oh cXap. This isn't going to go well. Not sure if we can recover from here, or if closing this particular FILE * is a good idea now. */ @@ -3293,10 +3296,10 @@ PerlIOStdio_close(pTHX_ PerlIO *f) if (dupfd >= 0) { PerlLIO_dup2(dupfd,fd); PerlLIO_close(dupfd); + } #ifdef USE_ITHREADS - MUTEX_UNLOCK(&PL_perlio_mutex); + MUTEX_UNLOCK(&PL_perlio_mutex); #endif - } return result; } } diff --git a/plan9/config.plan9 b/plan9/config.plan9 index 722e829..4951e0e 100644 --- a/plan9/config.plan9 +++ b/plan9/config.plan9 @@ -3329,8 +3329,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/sys/lib/perl/5.23.5" /**/ -#define PRIVLIB_EXP "/sys/lib/perl/5.23.5" /**/ +#define PRIVLIB "/sys/lib/perl/5.23.6" /**/ +#define PRIVLIB_EXP "/sys/lib/perl/5.23.6" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -3457,9 +3457,9 @@ * removed. The elements in inc_version_list (inc_version_list.U) can * be tacked onto this variable to generate a list of directories to search. */ -#define SITELIB "/sys/lib/perl/5.23.5/site_perl" /**/ -#define SITELIB_EXP "/sys/lib/perl/5.23.5/site_perl" /**/ -#define SITELIB_STEM "/sys/lib/perl/5.23.5/site_perl" /**/ +#define SITELIB "/sys/lib/perl/5.23.6/site_perl" /**/ +#define SITELIB_EXP "/sys/lib/perl/5.23.6/site_perl" /**/ +#define SITELIB_STEM "/sys/lib/perl/5.23.6/site_perl" /**/ /* Size_t_size: * This symbol holds the size of a Size_t in bytes. diff --git a/plan9/config_sh.sample b/plan9/config_sh.sample index 4dd6efb..ffe4728 100644 --- a/plan9/config_sh.sample +++ b/plan9/config_sh.sample @@ -32,12 +32,12 @@ alignbytes='4' ansi2knr='' aphostname='/bin/uname -n' api_revision='5' -api_subversion='5' +api_subversion='6' api_version='23' -api_versionstring='5.23.5' +api_versionstring='5.23.6' ar='ar' -archlib='/sys/lib/perl5/5.23.5/386' -archlibexp='/sys/lib/perl5/5.23.5/386' +archlib='/sys/lib/perl5/5.23.6/386' +archlibexp='/sys/lib/perl5/5.23.6/386' archname64='' archname='386' archobjs='' @@ -790,17 +790,17 @@ inc_version_list=' ' inc_version_list_init='0' incpath='' inews='' -installarchlib='/sys/lib/perl/5.23.5/386' +installarchlib='/sys/lib/perl/5.23.6/386' installbin='/usr/bin' installman1dir='/sys/man/1pub' installman3dir='/sys/man/2pub' installprefix='/usr' installprefixexp='/usr' -installprivlib='/sys/lib/perl/5.23.5' +installprivlib='/sys/lib/perl/5.23.6' installscript='/usr/bin' -installsitearch='/sys/lib/perl/5.23.5/site_perl/386' +installsitearch='/sys/lib/perl/5.23.6/site_perl/386' installsitebin='/usr/bin' -installsitelib='/sys/lib/perl/5.23.5/site_perl' +installsitelib='/sys/lib/perl/5.23.6/site_perl' installstyle='lib/perl5' installusrbinperl='undef' installvendorarch='' @@ -925,8 +925,8 @@ pmake='' pr='' prefix='/usr' prefixexp='/usr' -privlib='/sys/lib/perl/5.23.5' -privlibexp='/sys/lib/perl/5.23.5' +privlib='/sys/lib/perl/5.23.6' +privlibexp='/sys/lib/perl/5.23.6' procselfexe='' prototype='define' ptrsize='4' @@ -991,13 +991,13 @@ sig_num='0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 sig_num_init='0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0' sig_size='50' signal_t='void' -sitearch='/sys/lib/perl/5.23.5/site_perl/386' +sitearch='/sys/lib/perl/5.23.6/site_perl/386' sitearchexp='/sys/lib/perl/site_perl/386' sitebin='/usr/bin' sitebinexp='/usr/bin' -sitelib='/sys/lib/perl/5.23.5/site_perl' -sitelib_stem='/sys/lib/perl/5.23.5/site_perl' -sitelibexp='/sys/lib/perl/5.23.5/site_perl' +sitelib='/sys/lib/perl/5.23.6/site_perl' +sitelib_stem='/sys/lib/perl/5.23.6/site_perl' +sitelibexp='/sys/lib/perl/5.23.6/site_perl' siteprefix='/usr' siteprefixexp='/usr' sizesize='4' @@ -1030,7 +1030,7 @@ stdio_stream_array='' strerror_r_proto='0' strings='/sys/include/ape/string.h' submit='' -subversion='5' +subversion='6' sysman='/sys/man/1pub' tail='' tar='' @@ -1112,8 +1112,8 @@ vendorlib_stem='' vendorlibexp='' vendorprefix='' vendorprefixexp='' -version='5.23.5' -version_patchlevel_string='version 23 subversion 5' +version='5.23.6' +version_patchlevel_string='version 23 subversion 6' versiononly='undef' vi='' xlibpth='' @@ -1127,9 +1127,9 @@ config_args='' config_argc=0 PERL_REVISION=5 PERL_VERSION=23 -PERL_SUBVERSION=5 +PERL_SUBVERSION=6 PERL_API_REVISION=5 PERL_API_VERSION=23 -PERL_API_SUBVERSION=5 +PERL_API_SUBVERSION=6 PERL_PATCHLEVEL= PERL_CONFIG_SH=true diff --git a/pod/.gitignore b/pod/.gitignore index 4f6d9ce..4dba2d2 100644 --- a/pod/.gitignore +++ b/pod/.gitignore @@ -53,7 +53,7 @@ /roffitall # generated -/perl5235delta.pod +/perl5236delta.pod /perlapi.pod /perlintern.pod *.html diff --git a/pod/perl.pod b/pod/perl.pod index 7dc9662..a15c0ef 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -180,11 +180,13 @@ aux c2ph h2ph h2xs perlbug pl2pm pod2html pod2man splain xsubpp perlhist Perl history records perldelta Perl changes since previous version + perl5235delta Perl changes in version 5.23.5 perl5234delta Perl changes in version 5.23.4 perl5233delta Perl changes in version 5.23.3 perl5232delta Perl changes in version 5.23.2 perl5231delta Perl changes in version 5.23.1 perl5230delta Perl changes in version 5.23.0 + perl5221delta Perl changes in version 5.22.1 perl5220delta Perl changes in version 5.22.0 perl5203delta Perl changes in version 5.20.3 perl5202delta Perl changes in version 5.20.2 diff --git a/pod/perl5221delta.pod b/pod/perl5221delta.pod new file mode 100644 index 0000000..5d01a4b --- /dev/null +++ b/pod/perl5221delta.pod @@ -0,0 +1,338 @@ +=encoding utf8 + +=head1 NAME + +perl5221delta - what is new for perl v5.22.1 + +=head1 DESCRIPTION + +This document describes differences between the 5.22.0 release and the 5.22.1 +release. + +If you are upgrading from an earlier release such as 5.20.0, first read +L, which describes differences between 5.20.0 and 5.22.0. + +=head1 Incompatible Changes + +There are no changes intentionally incompatible with 5.20.0 other than the +following single exception, which we deemed to be a sensible change to make in +order to get the new C<\b{wb}> and (in particular) C<\b{sb}> features sane +before people decided they're worthless because of bugs in their Perl 5.22.0 +implementation and avoided them in the future. +If any others exist, they are bugs, and we request that you submit a report. +See L below. + +=head2 Bounds Checking Constructs + +Several bugs, including a segmentation fault, have been fixed with the bounds +checking constructs (introduced in Perl 5.22) C<\b{gcb}>, C<\b{sb}>, C<\b{wb}>, +C<\B{gcb}>, C<\B{sb}>, and C<\B{wb}>. All the C<\B{}> ones now match an empty +string; none of the C<\b{}> ones do. +L<[perl #126319]|https://rt.perl.org/Ticket/Display.html?id=126319> + +=head1 Modules and Pragmata + +=head2 Updated Modules and Pragmata + +=over 4 + +=item * + +L has been upgraded from version 5.20150520 to 5.20151213. + +=item * + +L has been upgraded from version 0.22 to 0.23. + +=item * + +L has been upgraded from version 1.53 to 1.53_01. + +If C was passed C<$!> as its argument then it accidentally +cleared C<$!>. This has been fixed. +L<[perl #126229]|https://rt.perl.org/Ticket/Display.html?id=126229> + +=item * + +L has been upgraded from version 2.53 to 2.53_01. + +=item * + +L has been upgraded from version 1.32 to 1.34. + +The C example now actually uses C. +L<[perl #126051]|https://rt.perl.org/Ticket/Display.html?id=126051> + +=item * + +L has been upgraded from version 0.51 to 0.52. + +This has been updated for Windows 8.1, 10 and 2012 R2 Server. + +=back + +=head1 Documentation + +=head2 Changes to Existing Documentation + +=head3 L + +=over 4 + +=item * + +The usage of C and C has been clarified. + +=back + +=head3 L + +=over 4 + +=item * + +The specific true value of C<$!{E...}> is now documented, noting that it is +subject to change and not guaranteed. + +=back + +=head1 Diagnostics + +The following additions or changes have been made to diagnostic output, +including warnings and fatal error messages. For the complete list of +diagnostic messages, see L. + +=head2 Changes to Existing Diagnostics + +=over 4 + +=item * + +The C and C builtins are now more careful about the warnings +they emit: argument reordering now disables the "redundant argument" warning in +all cases. +L<[perl #125469]|https://rt.perl.org/Ticket/Display.html?id=125469> + +=back + +=head1 Configuration and Compilation + +=over 4 + +=item * + +Using the C define in combination with the default hash algorithm +C resulted in a fatal error while compiling +the interpreter, since Perl 5.17.10. This has been fixed. + +=item * + +Configuring with ccflags containing quotes (e.g. +C<< -Accflags='-DAPPLLIB_EXP=\"/usr/libperl\"' >>) was broken in Perl 5.22.0 +but has now been fixed again. +L<[perl #125314]|https://rt.perl.org/Ticket/Display.html?id=125314> + +=back + +=head1 Platform Support + +=head2 Platform-Specific Notes + +=over 4 + +=item IRIX + +=over + +=item * + +Under some circumstances IRIX stdio fgetc() and fread() set the errno to +C, which made no sense according to either IRIX or POSIX docs. Errno +is now cleared in such cases. +L<[perl #123977]|https://rt.perl.org/Ticket/Display.html?id=123977> + +=item * + +Problems when multiplying long doubles by infinity have been fixed. +L<[perl #126396]|https://rt.perl.org/Ticket/Display.html?id=126396> + +=item * + +All tests pass now on IRIX with the default build configuration. + +=back + +=back + +=head1 Selected Bug Fixes + +=over 4 + +=item * + +C no longer segfaults, giving a syntax error message instead. +L<[perl #125805]|https://rt.perl.org/Ticket/Display.html?id=125805> + +=item * + +Regular expression possessive quantifier Perl 5.20 regression now fixed. +CIC<{>I,IC<}+>C is supposed to behave identically to +C>IC<{>I,IC<})/>. Since Perl 5.20, this didn't work +if I and I were equal. +L<[perl #125825]|https://rt.perl.org/Ticket/Display.html?id=125825> + +=item * + +Certain syntax errors in +L caused panics instead +of the proper error message. This has now been fixed. +L<[perl #126481]|https://rt.perl.org/Ticket/Display.html?id=126481> + +=item * + +C<< BEGIN <> >> no longer segfaults and properly produces an error message. +L<[perl #125341]|https://rt.perl.org/Ticket/Display.html?id=125341> + +=item * + +A regression from Perl 5.20 has been fixed, in which some syntax errors in +L|perlrecharclass/Extended Bracketed Character Classes> constructs +within regular expression patterns could cause a segfault instead of a proper +error message. +L<[perl #126180]|https://rt.perl.org/Ticket/Display.html?id=126180> + +=item * + +Another problem with +L|perlrecharclass/Extended Bracketed Character Classes> +constructs has been fixed wherein things like C<\c]> could cause panics. +L<[perl #126181]|https://rt.perl.org/Ticket/Display.html?id=126181> + +=item * + +In Perl 5.22.0, the logic changed when parsing a numeric parameter to the -C +option, such that the successfully parsed number was not saved as the option +value if it parsed to the end of the argument. +L<[perl #125381]|https://rt.perl.org/Ticket/Display.html?id=125381> + +=item * + +Warning fatality is now ignored when rewinding the stack. This prevents +infinite recursion when the now fatal error also causes rewinding of the stack. +L<[perl #123398]|https://rt.perl.org/Ticket/Display.html?id=123398> + +=item * + +A crash with C<< %::=(); J->${\"::"} >> has been fixed. +L<[perl #125541]|https://rt.perl.org/Ticket/Display.html?id=125541> + +=item * + +Nested quantifiers such as C should cause perl to throw a fatal +error, but were being silently accepted since Perl 5.20.0. This has been +fixed. +L<[perl #126253]|https://rt.perl.org/Ticket/Display.html?id=126253> + +=item * + +Regular expression sequences such as C (and similarly with other +recognized flags or combination of flags) should cause perl to throw a fatal +error, but were being silently accepted since Perl 5.18.0. This has been +fixed. +L<[perl #126178]|https://rt.perl.org/Ticket/Display.html?id=126178> + +=item * + +A bug in hexadecimal floating point literal support meant that high-order bits +could be lost in cases where mantissa overflow was caused by too many trailing +zeros in the fractional part. This has been fixed. +L<[perl #126582]|https://rt.perl.org/Ticket/Display.html?id=126582> + +=item * + +Another hexadecimal floating point bug, causing low-order bits to be lost in +cases where the last hexadecimal digit of the mantissa has bits straddling the +limit of the number of bits allowed for the mantissa, has also been fixed. +L<[perl #126586]|https://rt.perl.org/Ticket/Display.html?id=126586> + +=item * + +Further hexadecimal floating point bugs have been fixed: In some circumstances, +the C<%a> format specifier could variously lose the sign of the negative zero, +fail to display zeros after the radix point with the requested precision, or +even lose the radix point after the leftmost hexadecimal digit completely. + +=item * + +A crash caused by incomplete expressions within C<< /(?[ ])/ >> (e.g. +C<< /(?[[0]+()+])/ >>) has been fixed. +L<[perl #126615]|https://rt.perl.org/Ticket/Display.html?id=126615> + +=back + +=head1 Acknowledgements + +Perl 5.22.1 represents approximately 6 months of development since Perl 5.22.0 +and contains approximately 19,000 lines of changes across 130 files from 27 +authors. + +Excluding auto-generated files, documentation and release tools, there were +approximately 1,700 lines of changes to 44 .pm, .t, .c and .h files. + +Perl continues to flourish into its third decade thanks to a vibrant community +of users and developers. The following people are known to have contributed +the improvements that became Perl 5.22.1: + +Aaron Crane, Abigail, Andy Broad, Aristotle Pagaltzis, Chase Whitener, Chris +'BinGOs' Williams, Craig A. Berry, Daniel Dragan, David Mitchell, Father +Chrysostomos, Herbert Breunung, Hugo van der Sanden, James E Keenan, Jan +Dubois, Jarkko Hietaniemi, Karen Etheridge, Karl Williamson, Lukas Mai, Matthew +Horsfall, Peter Martini, Rafael Garcia-Suarez, Ricardo Signes, Shlomi Fish, +Sisyphus, Steve Hay, Tony Cook, Victor Adam. + +The list above is almost certainly incomplete as it is automatically generated +from version control history. In particular, it does not include the names of +the (very much appreciated) contributors who reported issues to the Perl bug +tracker. + +Many of the changes included in this version originated in the CPAN modules +included in Perl's core. We're grateful to the entire CPAN community for +helping Perl to flourish. + +For a more complete list of all of Perl's historical contributors, please see +the F file in the Perl source distribution. + +=head1 Reporting Bugs + +If you find what you think is a bug, you might check the articles recently +posted to the comp.lang.perl.misc newsgroup and the perl bug database at +https://rt.perl.org/ . There may also be information at +http://www.perl.org/ , the Perl Home Page. + +If you believe you have an unreported bug, please run the L program +included with your release. Be sure to trim your bug down to a tiny but +sufficient test case. Your bug report, along with the output of C, +will be sent off to perlbug@perl.org to be analysed by the Perl porting team. + +If the bug you are reporting has security implications, which make it +inappropriate to send to a publicly archived mailing list, then please send it +to perl5-security-report@perl.org. This points to a closed subscription +unarchived mailing list, which includes all the core committers, who will be +able to help assess the impact of issues, figure out a resolution, and help +co-ordinate the release of patches to mitigate or fix the problem across all +platforms on which Perl is supported. Please only use this address for +security issues in the Perl core, not for modules independently distributed on +CPAN. + +=head1 SEE ALSO + +The F file for an explanation of how to view exhaustive details on +what changed. + +The F file for how to build Perl. + +The F file for general stuff. + +The F and F files for copyright information. + +=cut diff --git a/pod/perl5235delta.pod b/pod/perl5235delta.pod new file mode 100644 index 0000000..c9a3e55 --- /dev/null +++ b/pod/perl5235delta.pod @@ -0,0 +1,420 @@ +=encoding utf8 + +=head1 NAME + +perl5235delta - what is new for perl v5.23.5 + +=head1 DESCRIPTION + +This document describes differences between the 5.23.4 release and the 5.23.5 +release. + +If you are upgrading from an earlier release such as 5.23.3, first read +L, which describes differences between 5.23.3 and 5.23.4. + +=head1 Performance Enhancements + +=over 4 + +=item * + +Faster addition, subtraction and multiplication. + +Since 5.8.0, arithmetic became slower due to the need to support +64-bit integers. To deal with 64-bit integers, a lot more corner +cases need to be checked, which adds time. We now detect common +cases where there is no need to check for those corner cases, +and special-case them. + +=item * + +Faster preincrement, predecrement, postincrement, postdecrement. + +By internally splitting the functions which handled multiple +cases into different functions. + +=back + +=head1 Modules and Pragmata + +=head2 Updated Modules and Pragmata + +=over 4 + +=item * + +L has been upgraded from version 0.10 to 0.11. + +=item * + +L has been upgraded from version 1.60 to 1.61. + +=item * + +L has been upgraded from version 2.22 to 2.22_01. + +Better handling of attempts to load non-existent modules. +Improvements to fields.pm documentation. L now requires v5.8.0. + +=item * + +L has been upgraded from version 1.37 to 1.38. + +Improvements when working with older perls. + +=item * + +L has been upgraded from version 0.24 to 0.25. + +=item * + +L has been upgraded from version 1.22 to 1.23. + +=item * + +L has been upgraded from version 1.17 to 1.18. + +=item * + +L has been upgraded from version 1.35 to 1.36. + +=item * + +L has been upgraded from version 2.17 to 2.22_01. + +=item * + +L has been upgraded from version 1.31 to 1.32. + +Handles empty directory lists. + +=item * + +L has been upgraded from version 3.58 to 3.59. + +=item * + +L has been upgraded from version 2.47 to 2.48. + +Fixes an issue with C<< gnu_compat >> + +=item * + +L has been upgraded from version 1.17 to 1.18. + +=item * + +L has been upgraded from version 1.19 to 1.20. + +Include the error message on C<< exec() >> failure. + +=item * + +L has been upgraded from version 1.999704 to 1.999710. + +=item * + +L has been upgraded from version 0.34 to 0.37. + +=item * + +L has been upgraded from version 5.20151020 to 5.20151120. + +=item * + +L has been upgraded from version 1.000029 to 1.000030. + +Temp dirs cleaned up during tests. More accurately mark tests as TODO, so as to have a quieter and less confusing test run without passing TODO tests. + +=item * + +L has been upgraded from version 0.22 to 0.23. + +=item * + +L has been upgraded from version 0.014 to 0.015. + +=item * + +L has been upgraded from version 0.23 to 0.24. + +=item * + +L has been upgraded from version 0.15 to 0.16. + +=item * + +L has been upgraded from version 3.30 to 3.32. + +Switched debugging output from C<< STDOUT >> to C<< STDERR >>. + +Added C<< errata_seen() >> to make POD errors easily accessible. + +Simplified the detection of case-insensitivity in Pod::Simple::Search. + +Fixed C<< Use of uninitialized value $1 in lc >> warning in +Pod::Simple::Search. + +If C<< @INC >> includes the current directory symbol, C<.>, the +C<< survey() >> method of C<< Pod::Simple::Search >> no longer excludes +it from its list of directories to search. Instead, The C<< survey() >> and +C<< find() >> methods now both exclude duplicate directories from C<< @INC >> +(RT #102344). + +Moved source repository and updated links to new perl-pod GitHub +organization: L. + +Improved repository links and added GitHub issue tracking link to +the distribution metadata. + +Switched from C<< File::Spec >>'s C<< catdir >> to C<< catfile >> +for path names, to fix failures on VMS. Also now use Unix path semantics +where they're not required to be platform-specific. (RT #105511). + +Improved the example use of the C<< html_encode_chars() >> method in +the C<< Pod::Simple::XHTML >> documentation. + +=item * + +L has been upgraded from version 1.58 to 1.59. + +=item * + +L has been upgraded from version 3.06 to 3.07. + +=item * + +L has been upgraded from version 2.03 to 2.04. + +=item * + +L has been upgraded from version 1.48 to 1.49. + +=item * + +L has been upgraded from version 1.03 to 1.04. + +=item * + +L has been upgraded from version 1.9727_02 to 1.9728. + +=item * + +L has been upgraded from version 1.30 to 1.31. + +=item * + +L has been upgraded from version 1.21 to 1.23. + +=item * + +L has been upgraded from version 0.20 to 0.21. + +=back + +=head1 Diagnostics + +The following additions or changes have been made to diagnostic output, +including warnings and fatal error messages. For the complete list of +diagnostic messages, see L. + +=head2 New Diagnostics + +=head3 New Errors + +=over 4 + +=item * + +L<<< Sequence (?... not terminated in regex; marked by S<<-- HERE> in mE%sE|perldiag/"Sequence (?... not terminated in regex; marked by <-- HERE in mE%sE" >>> + +=back + +=head2 Changes to Existing Diagnostics + +=over 4 + +=item * + +When running out of memory during an attempt the increase the stack +size, previously, perl would die using the cryptic message +C<< panic: av_extend_guts() negative count (-9223372036854775681) >>. +This has been fixed to show the prettier message: +L<< Out of memory during stack extend|perldiag/"Out of memory during %s extend" >> + +=back + +=head1 Configuration and Compilation + +=over 4 + +=item * + +C now acts as if the C<-O> option is always passed, allowing command +line options to override saved configuration. This should eliminate confusion +when command line options are ignored for no obvious reason. C<-O> is now +permitted, but ignored. + +=item * + +Some filesystem stat symbols which were not used by the Perl core +were removed in an earlier commit. However, since these symbols +turned out to be used by at least one CPAN module, these symbols +have been restored. + +=item * + +C<< PPPort.so/PPPort.dll >> no longer get installed, as they are +not used by C<< PPPort.pm >>, only by its test files. + +=item * + +It is now possible to specify which compilation date to show on +C<< perl -V >> output, by setting the macro C<< PERL_BUILD_DATE >>. + +=back + +=head1 Platform Support + +=head2 Platform-Specific Notes + +=over 4 + +=item Win32 + +Win32 does now a parallel build with C++. + +=item Tru64 + +Workaround where Tru64 balks when prototypes are listed as +C<< PERL_STATIC_INLINE >>, but where the test is build with +C<< -DPERL_NO_INLINE_FUNCTIONS >>. + +=back + +=head1 Internal Changes + +=over 4 + +=item * + +C<< sv_ref() >> is now part of the API. + +=back + +=head1 Selected Bug Fixes + +=over 4 + +=item * + +C<\b{sb}> works much better. In Perl v5.22.0, this new construct didn't +seem to give the expected results, yet passed all the tests in the +extensive suite furnished by Unicode. It turns out that it was because +these were short input strings, and the failures had to do with longer +inputs. This was fixed in Perl 5.23.4, but the improvement was not +noticed until after that was released, so is included here now. + +=item * + +Certain syntax errors in +L caused panics +instead of the proper error message. This has now been fixed. [perl +#126481] + +=item * + +An earlier commit added a message when a quantifier in a regular +expression was useless, but then caused the parser to skip it; +this caused the surplus quantifier to be silently ignored, instead +of throwing an error. This is now fixed. [perl #126253] + +=item * + +The switch to building non-XS modules last in win32/makefile.mk (introduced +by design as part of the changes to enable parallel building) caused the +build of POSIX to break due to problems with the version module. This +is now fixed. + +=item * + +Improved parsing of hex float constants. + +=item * + +Fixed an issue with C<< pack >> where C<< pack "H" >> (and C<< pack "h" >>) +could read past the source when given a non-utf8 source, and a utf8 target. +[perl #126325] + +=item * + +Fixed several cases where perl would abort due to a segmentation fault, +or a C-level assert. [perl #126615], [perl #126602], [perl #126193]. + +=back + +=head1 Acknowledgements + +Perl 5.23.5 represents approximately 4 weeks of development since Perl 5.23.4 +and contains approximately 12,000 lines of changes across 290 files from 23 +authors. + +Excluding auto-generated files, documentation and release tools, there were +approximately 6,400 lines of changes to 180 .pm, .t, .c and .h files. + +Perl continues to flourish into its third decade thanks to a vibrant community +of users and developers. The following people are known to have contributed the +improvements that became Perl 5.23.5: + +Aaron Crane, Abigail, Chris 'BinGOs' Williams, Craig A. Berry, Dagfinn Ilmari +Mannsåker, Daniel Dragan, David Mitchell, Dr.Ruud, H.Merijn Brand, Ivan +Pozdeev, James E Keenan, Jarkko Hietaniemi, Jerry D. Hedden, Karen Etheridge, +Karl Williamson, Lukas Mai, Mohammed El-Afifi, Niko Tyni, Peter Rabbitson, +Reini Urban, Ricardo Signes, Steve Hay, Tony Cook. + +The list above is almost certainly incomplete as it is automatically generated +from version control history. In particular, it does not include the names of +the (very much appreciated) contributors who reported issues to the Perl bug +tracker. + +Many of the changes included in this version originated in the CPAN modules +included in Perl's core. We're grateful to the entire CPAN community for +helping Perl to flourish. + +For a more complete list of all of Perl's historical contributors, please see +the F file in the Perl source distribution. + +=head1 Reporting Bugs + +If you find what you think is a bug, you might check the articles recently +posted to the comp.lang.perl.misc newsgroup and the perl bug database at +L . There may also be information at +L , the Perl Home Page. + +If you believe you have an unreported bug, please run the L program +included with your release. Be sure to trim your bug down to a tiny but +sufficient test case. Your bug report, along with the output of C, +will be sent off to perlbug@perl.org to be analysed by the Perl porting team. + +If the bug you are reporting has security implications, which make it +inappropriate to send to a publicly archived mailing list, then please send it +to perl5-security-report@perl.org. This points to a closed subscription +unarchived mailing list, which includes all the core committers, who will be +able to help assess the impact of issues, figure out a resolution, and help +co-ordinate the release of patches to mitigate or fix the problem across all +platforms on which Perl is supported. Please only use this address for +security issues in the Perl core, not for modules independently distributed on +CPAN. + +=head1 SEE ALSO + +The F file for an explanation of how to view exhaustive details on +what changed. + +The F file for how to build Perl. + +The F file for general stuff. + +The F and F files for copyright information. + +=cut diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 80ee973..2438a0d 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -2,36 +2,71 @@ =head1 NAME -perldelta - what is new for perl v5.23.5 +perldelta - what is new for perl v5.23.6 =head1 DESCRIPTION -This document describes differences between the 5.23.4 release and the 5.23.5 +This document describes differences between the 5.23.5 release and the 5.23.6 release. -If you are upgrading from an earlier release such as 5.23.3, first read -L, which describes differences between 5.23.3 and 5.23.4. +If you are upgrading from an earlier release such as 5.23.4, first read +L, which describes differences between 5.23.4 and 5.23.5. -=head1 Performance Enhancements +=head1 Incompatible Changes -=over 4 +=head2 Regular expression compilation errors -=item * +Some regular expression patterns that had runtime errors now +don't compile at all. -Faster addition, subtraction and multiplication. +This should have been in the perldelta for 5.23.4, but was omitted. -Since 5.8.0, arithmetic became slower due to the need to support -64-bit integers. To deal with 64-bit integers, a lot more corner -cases need to be checked, which adds time. We now detect common -cases where there is no need to check for those corner cases, -and special-case them. +Almost all Unicode properties using the C<\p{}> and C<\P{}> regular +expression pattern constructs are now checked for validity at pattern +compilation time, and invalid ones will cause the program to not +compile. In earlier releases, this check was often deferred until run +time. Whenever an error check is moved from run- to compile time, +erroneous code is caught 100% of the time, whereas before it would only +get caught if and when the offending portion actually gets executed, +which for unreachable code might be never. -=item * +=head1 Deprecations + +=head2 Using code points above the platform's C is now +deprecated -Faster preincrement, predecrement, postincrement, postdecrement. +Unicode defines code points in the range C<0..0x10FFFF>. Some standards +at one time defined them up to 2**31 - 1, but Perl has allowed them to +be as high as anything that will fit in a word on the platform being +used. However, use of those above the platform's C is broken in +some constructs, notably C, regular expression patterns involving +quantifiers, and in some arithmetic and comparison operations, such as +being the upper limit of a loop. Now the use of such code points raises +a deprecation warning, unless that warning category is turned off. +C is typically 2**31 -1 on 32-bit platforms, and 2**63-1 on +64-bit ones. -By internally splitting the functions which handled multiple -cases into different functions. +=head2 Doing bitwise operations on strings containing code points above +0xFF is deprecated + +The string bitwise operators treat their operands as strings of bytes, +and values beyond 0xFF are nonsensical in this context. To operate on +encoded bytes, first encode the strings. To operate on code points' +numeric values, use C and C. In the future, this +warning will be replaced by an exception. + +=head1 Performance Enhancements + +=over 4 + +=item * + +Many languages, such as Chinese, are caseless. Perl now knows about +most modern commercially important ones, and skips much of the work when +a program tries to change case in them (like C) or match +caselessly (C). This will speed up a program, such as a web +server, that can operate on multiple languages, while operating on a +caseless one. =back @@ -43,172 +78,193 @@ cases into different functions. =item * -L has been upgraded from version 0.10 to 0.11. +The modules L, L, and the perl debugger could have, under +rare circumstances, output raw control characters. This has been fixed. =item * -L has been upgraded from version 1.60 to 1.61. +L has been upgraded from version 1.35 to 1.36. + +Uses of C<< < > >> are now always deparsed as either C or +C, and uses of C or C are never deparsed as +C<< < > >>. This fixes the deparsing of C. +L<[perl #116677]|https://rt.perl.org/Ticket/Display.html?id=116677> =item * -L has been upgraded from version 2.22 to 2.22_01. +L has been upgraded from version 2.22_01 to 2.23. -Better handling of attempts to load non-existent modules. -Improvements to fields.pm documentation. L now requires v5.8.0. +=item * + +L has been upgraded from version 1.2 to 1.21. =item * -L has been upgraded from version 1.37 to 1.38. +L has been upgraded from version 2.133 to 2.140. -Improvements when working with older perls. +Adds a method for getting structured requirements and adds better error +messages. =item * -L has been upgraded from version 0.24 to 0.25. +L has been upgraded from version 0.017 to 0.018, +with no change since 0.017. =item * -L has been upgraded from version 1.22 to 1.23. +L has been upgraded from version 2.158 to 2.159. -=item * +This adds a "Trailingcomma" option, which when enabled adds a trailing comma +after the last element of dumped arrays and hashes that would otherwise be +followed immediately by a line break. +L<[perl #126813]|https://rt.perl.org/Ticket/Display.html?id=126813> -L has been upgraded from version 1.17 to 1.18. +The internals have also been substantially refactored and cleaned up. It +may be more efficient on some platforms. =item * -L has been upgraded from version 1.35 to 1.36. +L has been upgraded from version 1.36 to 1.37. + +Fixed an else nesting issue in dynamic loading support for OS/390 (and +similar systems) that was introduced in 1.36. =item * -L has been upgraded from version 2.17 to 2.22_01. +L has been upgraded from version 0.11 to 0.12. =item * -L has been upgraded from version 1.31 to 1.32. - -Handles empty directory lists. +L has been upgraded from version 2.22_01 to 2.23. =item * -L has been upgraded from version 3.58 to 3.59. +L and L have been upgraded from version 3.59 to 3.60, +adding L. =item * -L has been upgraded from version 2.47 to 2.48. - -Fixes an issue with C<< gnu_compat >> +L has been upgraded from version 1.18 to 1.19. =item * -L has been upgraded from version 1.17 to 1.18. +L has been upgraded from version 1.07 to 1.08. =item * -L has been upgraded from version 1.19 to 1.20. - -Include the error message on C<< exec() >> failure. +L has been upgraded from version 3.36 to 3.37. =item * -L has been upgraded from version 1.999704 to 1.999710. +L has been updated from version 0.37 to 0.38. =item * -L has been upgraded from version 0.34 to 0.37. +L has been updated from version 5.20151120 to +5.20151220. =item * -L has been upgraded from version 5.20151020 to 5.20151120. +L has been updated from version 1.000030 to +1.000031, with no functional changes. =item * -L has been upgraded from version 1.000029 to 1.000030. +L has been upgraded from version 1.33 to 1.34. -Temp dirs cleaned up during tests. More accurately mark tests as TODO, so as to have a quieter and less confusing test run without passing TODO tests. +=item * + +L has been upgraded from version 0.015 to 0.016. =item * -L has been upgraded from version 0.22 to 0.23. +L has been upgraded from version 3.25_01 to 3.25_02. =item * -L has been upgraded from version 0.014 to 0.015. +L has been upgraded from version 1.13 to 1.14. =item * -L has been upgraded from version 0.23 to 0.24. +L has been upgraded from version 1.09 to 1.10, with only a +documentation change. =item * -L has been upgraded from version 0.15 to 0.16. +L has been upgraded from version 4.03 to 4.04, with no +functional changes. =item * -L has been upgraded from version 3.30 to 3.32. +L has been upgraded from version 1.27 to 1.28. -Switched debugging output from C<< STDOUT >> to C<< STDERR >>. +=item * -Added C<< errata_seen() >> to make POD errors easily accessible. +L has been upgraded from version 2.04 to 2.05. -Simplified the detection of case-insensitivity in Pod::Simple::Search. +=item * -Fixed C<< Use of uninitialized value $1 in lc >> warning in -Pod::Simple::Search. +L has been upgraded from version 1.49 to 1.50. -If C<< @INC >> includes the current directory symbol, C<.>, the -C<< survey() >> method of C<< Pod::Simple::Search >> no longer excludes -it from its list of directories to search. Instead, The C<< survey() >> and -C<< find() >> methods now both exclude duplicate directories from C<< @INC >> -(RT #102344). +=item * -Moved source repository and updated links to new perl-pod GitHub -organization: L. +L has been updated from version 1.23 to 1.24, with +no functional changes. -Improved repository links and added GitHub issue tracking link to -the distribution metadata. +=item * -Switched from C<< File::Spec >>'s C<< catdir >> to C<< catfile >> -for path names, to fix failures on VMS. Also now use Unix path semantics -where they're not required to be platform-specific. (RT #105511). +L has been upgraded from version 1.17 to 1.18. -Improved the example use of the C<< html_encode_chars() >> method in -the C<< Pod::Simple::XHTML >> documentation. +Partly reverted a micro-optimization to F that +caused self-recursion when it was loaded with C<${^ENCODING}> set. +L<[perl #126593]|https://rt.perl.org/Ticket/Display.html?id=126593> -=item * +=back -L has been upgraded from version 1.58 to 1.59. +=head1 Documentation -=item * +=head2 Changes to Existing Documentation + +=head3 L -L has been upgraded from version 3.06 to 3.07. +=over 4 =item * -L has been upgraded from version 2.03 to 2.04. +The documentation of C has been revised to clarify valid inputs. -=item * +=back -L has been upgraded from version 1.48 to 1.49. +=head3 L + +=over 4 =item * -L has been upgraded from version 1.03 to 1.04. +The documentation of C now describes how C<$?> is affected. -=item * +=back -L has been upgraded from version 1.9727_02 to 1.9728. +=head3 L + +=over 4 =item * -L has been upgraded from version 1.30 to 1.31. +The documentation of C<$@> was reworded to clarify that it is not just for +syntax errors in C. +L<[perl #124034]|https://rt.perl.org/Ticket/Display.html?id=124034> -=item * +=back + +=head3 L -L has been upgraded from version 1.21 to 1.23. +=over 4 =item * -L has been upgraded from version 0.20 to 0.21. +The documentation of C has been clarified; they are I +by default, not I. =back @@ -226,69 +282,85 @@ diagnostic messages, see L. =item * -L<<< Sequence (?... not terminated in regex; marked by S<<-- HERE> in mE%sE|perldiag/"Sequence (?... not terminated in regex; marked by <-- HERE in mE%sE" >>> +L + +(F) You supplied a number to the -C option that either has extra leading +zeroes or overflows perl's unsigned integer representation. =back -=head2 Changes to Existing Diagnostics +=head1 Configuration and Compilation =over 4 =item * -When running out of memory during an attempt the increase the stack -size, previously, perl would die using the cryptic message -C<< panic: av_extend_guts() negative count (-9223372036854775681) >>. -This has been fixed to show the prettier message: -L<< Out of memory during stack extend|perldiag/"Out of memory during %s extend" >> +F is no longer inappropriately silent. This was caused +by an operator precedence error introduced in 5.23.4. =back -=head1 Configuration and Compilation +=head1 Platform Support + +=head2 New Platforms =over 4 -=item * +=item OpenIndiana -C now acts as if the C<-O> option is always passed, allowing command -line options to override saved configuration. This should eliminate confusion -when command line options are ignored for no obvious reason. C<-O> is now -permitted, but ignored. +OpenIndiana (continuation of OpenSolaris) builds were not working due +to problems with the Perl shared library. This should be working now. +L<[perl #126958]|https://rt.perl.org/Ticket/Display.html?id=126958> -=item * +=back -Some filesystem stat symbols which were not used by the Perl core -were removed in an earlier commit. However, since these symbols -turned out to be used by at least one CPAN module, these symbols -have been restored. +=head2 Platform-Specific Notes -=item * +=over 4 -C<< PPPort.so/PPPort.dll >> no longer get installed, as they are -not used by C<< PPPort.pm >>, only by its test files. +=item EBCDIC platforms, such as z/OS -=item * +UTF-EBCDIC is like UTF-8, but for EBCDIC platforms. It now has been +extended so that it can represent code points up to 2 ** 64 - 1 on +platforms with 64-bit words. This brings it into parity with UTF-8. +This enhancement requires an incompatible change to the representation +of code points in the range 2 ** 30 to 2 ** 31 -1 (the latter was the +previous maximum representable code point). This means that a file that +contains one of these code points, written out with previous versions of +perl cannot be read in, without conversion, by a perl containing this +change. We do not believe any such files are in existence, but if you +do have one, submit a ticket at L, +and we will write a conversion script for you. -It is now possible to specify which compilation date to show on -C<< perl -V >> output, by setting the macro C<< PERL_BUILD_DATE >>. +=item Cygwin -=back +Tests are more robust against unusual cygdrive prefixes. +L<[perl #126834]|https://rt.perl.org/Ticket/Display.html?id=126834> -=head1 Platform Support +=item OS X/Darwin -=head2 Platform-Specific Notes +Builds with both -DDEBUGGING and threading enabled would fail with a +"panic: free from wrong pool" error when built or tested from Terminal +on OS X. This was caused by perl's internal management of the +environment conflicting with an atfork handler using the libc +setenv() function to update the environment. -=over 4 +Perl now uses setenv()/unsetenv() to update the environment on OS X. +L<[perl #126240]|https://rt.perl.org/Ticket/Display.html?id=126240> -=item Win32 +=item ppc64el floating point -Win32 does now a parallel build with C++. +The floating point format of ppc64el (Debian naming for little-endian +PowerPC) is now detected correctly. -=item Tru64 +=item Solaris -Workaround where Tru64 balks when prototypes are listed as -C<< PERL_STATIC_INLINE >>, but where the test is build with -C<< -DPERL_NO_INLINE_FUNCTIONS >>. +All Solaris now builds shared libperl. + +Solaris and variants like OpenIndiana now always build with the shared +Perl library (Configure -Duseshrplib). This was required for the +OpenIndiana builds, but this has also been the setting for Oracle/Sun +Perl builds for several years. =back @@ -298,7 +370,30 @@ C<< -DPERL_NO_INLINE_FUNCTIONS >>. =item * -C<< sv_ref() >> is now part of the API. +Perl core code and the threads extension have been annotated so that, +if Perl is configured to use threads, then during compile-time clang (3.6 +or later) will warn about suspicious uses of mutexes. +See L for more +information. + +=item * + +The C emulation has been enhanced. This will help older +and/or more exotic platforms or configurations. + +=item * + +The C function is discouraged in favor of C, +C, C, and C. + +=item * + +EBCDIC code paths have largely been unified to avoid repetition. + +=item * + +MSWin32 code for C<$^X> has been moved out of the F directory to +F, where other operating systems set that variable. =back @@ -308,69 +403,76 @@ C<< sv_ref() >> is now part of the API. =item * -C<\b{sb}> works much better. In Perl v5.22.0, this new construct didn't -seem to give the expected results, yet passed all the tests in the -extensive suite furnished by Unicode. It turns out that it was because -these were short input strings, and the failures had to do with longer -inputs. This was fixed in Perl 5.23.4, but the improvement was not -noticed until after that was released, so is included here now. +C no longer crashes on utf8 strings. When C<\G> is a fixed number +of characters from the start of the regex, perl needs to count back that +many characters from the current C position and start matching from +there. However, it was counting back bytes rather than characters, which +could lead to panics on utf8 strings. + +=item * + +In some cases operators that return integers would return negative +integers as large positive integers. +L<[perl #126635]|https://rt.perl.org/Ticket/Display.html?id=126635> =item * -Certain syntax errors in -L caused panics -instead of the proper error message. This has now been fixed. [perl -#126481] +The C operator would assert for DEBUGGING builds instead of +producing the correct error message. The condition asserted on is +detected and reported on correctly without the assertions, so the +assertions were removed. +L<[perl #126480]|https://rt.perl.org/Ticket/Display.html?id=126480> =item * -An earlier commit added a message when a quantifier in a regular -expression was useless, but then caused the parser to skip it; -this caused the surplus quantifier to be silently ignored, instead -of throwing an error. This is now fixed. [perl #126253] +In some cases, failing to parse a here-doc would attempt to use freed +memory. This was caused by a pointer not being restored correctly. +L<[perl #126443]|https://rt.perl.org/Ticket/Display.html?id=126443> =item * -The switch to building non-XS modules last in win32/makefile.mk (introduced -by design as part of the changes to enable parallel building) caused the -build of POSIX to break due to problems with the version module. This -is now fixed. +C<< @x = sort { *a = 0; $a <=> $b } 0 .. 1 >> no longer frees the GP +for *a before restoring its SV slot. +L<[perl #124097]|https://rt.perl.org/Ticket/Display.html?id=124097> =item * -Improved parsing of hex float constants. +Multiple problems with the new hexadecimal floating point printf +format C<%a> were fixed: +L<[perl #126582]|https://rt.perl.org/Ticket/Display.html?id=126582>, +L<[perl #126586]|https://rt.perl.org/Ticket/Display.html?id=126586>, +L<[perl #126822]|https://rt.perl.org/Ticket/Display.html?id=126822> =item * -Fixed an issue with C<< pack >> where C<< pack "H" >> (and C<< pack "h" >>) -could read past the source when given a non-utf8 source, and a utf8 target. -[perl #126325] +Calling mg_set() in leave_scope() no longer leaks. =item * -Fixed several cases where perl would abort due to a segmentation fault, -or a C-level assert. [perl #126615], [perl #126602], [perl #126193]. +A regression from Perl v5.20 was fixed in which debugging output of regular +expression compilation was wrong. (The pattern was correctly compiled, but +what got displayed for it was wrong.) =back =head1 Acknowledgements -Perl 5.23.5 represents approximately 4 weeks of development since Perl 5.23.4 -and contains approximately 12,000 lines of changes across 290 files from 23 +Perl 5.23.6 represents approximately 4 weeks of development since Perl 5.23.5 +and contains approximately 11,000 lines of changes across 260 files from 22 authors. Excluding auto-generated files, documentation and release tools, there were -approximately 6,400 lines of changes to 180 .pm, .t, .c and .h files. +approximately 7,500 lines of changes to 170 .pm, .t, .c and .h files. Perl continues to flourish into its third decade thanks to a vibrant community of users and developers. The following people are known to have contributed the -improvements that became Perl 5.23.5: +improvements that became Perl 5.23.6: -Aaron Crane, Abigail, Chris 'BinGOs' Williams, Craig A. Berry, Dagfinn Ilmari -Mannsåker, Daniel Dragan, David Mitchell, Dr.Ruud, H.Merijn Brand, Ivan -Pozdeev, James E Keenan, Jarkko Hietaniemi, Jerry D. Hedden, Karen Etheridge, -Karl Williamson, Lukas Mai, Mohammed El-Afifi, Niko Tyni, Peter Rabbitson, -Reini Urban, Ricardo Signes, Steve Hay, Tony Cook. +Aaron Crane, Abigail, Achim Gratz, Andy Broad, Aristotle Pagaltzis, Chris +'BinGOs' Williams, Craig A. Berry, Dagfinn Ilmari Mannsåker, Daniel Dragan, +David Golden, David Mitchell, Doug Bell, Ed Avis, Jarkko Hietaniemi, Karen +Etheridge, Karl Williamson, Lukas Mai, Ricardo Signes, Shlomi Fish, Steve Hay, +Sullivan Beck, Tony Cook. The list above is almost certainly incomplete as it is automatically generated from version control history. In particular, it does not include the names of diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 5111410..ae061f9 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1623,17 +1623,41 @@ This subroutine cannot be called. (F) You had a (sub-)template that ends with a '/'. There must be another template code following the slash. See L. +=item Code point 0x%X is not Unicode, and not portable + +(S non_unicode) You had a code point that has never been in any +standard, so it is likely that languages other than Perl will NOT +understand it. At one time, it was legal in some standards to have code +points up to 0x7FFF_FFFF, but not higher, and this code point is higher. + +Acceptance of these code points is a Perl extension, and you should +expect that nothing other than Perl can handle them; Perl itself on +EBCDIC platforms before v5.24 does not handle them. + +Code points above 0xFFFF_FFFF require larger than a 32 bit word. + +Perl also makes no guarantees that the representation of these code +points won't change at some point in the future, say when machines +become available that have larger than a 64-bit word. At that time, +files written by an older Perl would require conversion before being +readable by a newer Perl. + =item Code point 0x%X is not Unicode, may not be portable (S non_unicode) You had a code point above the Unicode maximum of U+10FFFF. -Perl allows strings to contain a superset of Unicode code points, up -to the limit of what is storable in an unsigned integer on your system, -but these may not be accepted by other languages/systems. At one time, -it was legal in some standards to have code points up to 0x7FFF_FFFF, -but not higher. Code points above 0xFFFF_FFFF require larger than a -32 bit word. +Perl allows strings to contain a superset of Unicode code points, but +these may not be accepted by other languages/systems. Further, even if +these languages/systems accept these large code points, they may have +chosen a different representation for them than the UTF-8-like one that +Perl has, which would mean files are not exchangeable between them and +Perl. + +On EBCDIC platforms, code points above 0x3FFF_FFFF have a different +representation in Perl v5.24 than before, so any file containing these +that was written before that version will require conversion before +being readable by a later Perl. =item %s: Command not found @@ -2597,12 +2621,6 @@ parent '%s' C3-consistent, and you have enabled the C3 MRO for this class. See the C3 documentation in L for more information. -=item In EBCDIC the v-string components cannot exceed 2147483647 - -(F) An error peculiar to EBCDIC. Internally, v-strings are stored as -Unicode code points, and encoded in EBCDIC as UTF-EBCDIC. The UTF-EBCDIC -encoding is limited to code points no larger than 2147483647 (0x7FFFFFFF). - =item Infinite recursion in regex (F) You used a pattern that references itself without consuming any input @@ -2639,6 +2657,11 @@ provides a list context to its subscript, which can do weird things if you're expecting only one subscript. When called in list context, it also returns the key in addition to the value. +=item Invalid number '%s' for -C option. + +(F) You supplied a number to the -C option that either has extra leading +zeroes or overflows perl's unsigned integer representation. + =item %s() is deprecated on :utf8 handles (W deprecated) The sysread(), recv(), syswrite() and send() operators @@ -6629,14 +6652,6 @@ is deprecated. See L. form if you wish to use an empty line as the terminator of the here-document. -=item Use of %s for non-UTF-8 locale is wrong. Assuming a UTF-8 locale - -(W locale) You are matching a regular expression using locale rules, -and the specified construct was encountered. This construct is only -valid for UTF-8 locales, which the current locale isn't. This doesn't -make sense. Perl will continue, assuming a Unicode (UTF-8) locale, but -the results are likely to be wrong. - =item Use of /c modifier is meaningless in s/// (W regexp) You used the /c modifier in a substitution. The /c @@ -6648,6 +6663,20 @@ modifier is not presently meaningful in substitutions. use the /g modifier. Currently, /c is meaningful only when /g is used. (This may change in the future.) +=item Use of code point 0x%s is deprecated; the permissible max is 0x%s + +(D deprecated) You used a code point that will not be allowed in a +future perl version, because it is too large. Unicode only allows code +points up to 0x10FFFF, but Perl allows much larger ones. However, the +largest possible ones break the perl interpreter in some constructs, +including causing it to hang in a few cases. The known problem areas +are in C, regular expression pattern matching using quantifiers, +and as the upper limits in loops. + +If your code is to run on various platforms, keep in mind that the upper +limit depends on the platform. It is much larger on 64-bit word sizes +than 32-bit ones. + =item Use of comma-less variable list is deprecated (D deprecated) The values you give to a format should be @@ -6669,6 +6698,14 @@ error, so C<:=> can be reclaimed as a new operator in the future. If you need an empty attribute list, for example in a code generator, add a space before the C<=>. +=item Use of %s for non-UTF-8 locale is wrong. Assuming a UTF-8 locale + +(W locale) You are matching a regular expression using locale rules, +and the specified construct was encountered. This construct is only +valid for UTF-8 locales, which the current locale isn't. This doesn't +make sense. Perl will continue, assuming a Unicode (UTF-8) locale, but +the results are likely to be wrong. + =item Use of freed value in iteration (F) Perhaps you modified the iterated array within the loop? @@ -6782,6 +6819,14 @@ operators and then you presumably know what you are doing. its behavior may change or even be removed in any future release of perl. See the explanation under L. +=item Use of strings with code points over 0xFF as arguments to %s +operator is deprecated + +(D deprecated) You tried to use one of the string bitwise operators +(C<&> or C<|> or C<^> or C<~>) on a string containing a code point over +0xFF. The string bitwise operators treat their operands as strings of +bytes, and values beyond 0xFF are nonsensical in this context. + =item Use of tainted arguments in %s is deprecated (W taint, deprecated) You have supplied C or C with multiple diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index e54084a..552a8a3 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -243,15 +243,15 @@ In UTF-EBCDIC, there are 160 invariant characters. which have ASCII equivalents, plus those that correspond to the C1 controls (128 - 159 on ASCII platforms).) -A string encoded in UTF-EBCDIC may be longer (but never shorter) than -one encoded in UTF-8. Perl extends UTF-8 so that it can encode code -points above the Unicode maximum of U+10FFFF. It extends UTF-EBCDIC as -well, but due to the inherent limitations in UTF-EBCDIC, the maximum -code point expressible is U+7FFF_FFFF, even if the word size is more -than 32 bits. +A string encoded in UTF-EBCDIC may be longer (very rarely shorter) than +one encoded in UTF-8. Perl extends both UTF-8 and UTF-EBCDIC so that +they can encode code points above the Unicode maximum of U+10FFFF. Both +extensions are constructed to allow encoding of any code point that fits +in a 64-bit word. UTF-EBCDIC is defined by -L. +L +(often referred to as just TR16). It is defined based on CCSID 1047, not allowing for the differences for other code pages. This allows for easy interchange of text between computers running different code pages, but makes it unusable, without @@ -268,6 +268,11 @@ invariant. This means that text generated on a computer running one version of Perl's UTF-EBCDIC has to be translated to be intelligible to a computer running another. +TR16 implies a method to extend UTF-EBCDIC to encode points up through +S>. Perl uses this method for code points up through +S>, but uses an incompatible method for larger ones, to +enable it to handle much larger code points than otherwise. + =head2 Using Encode Starting from Perl 5.8 you can use the standard module Encode @@ -1226,10 +1231,6 @@ character return value on an EBCDIC platform. For example: $CAPITAL_LETTER_A = chr(193); -The largest code point that is representable in UTF-EBCDIC is -U+7FFF_FFFF. If you do C on a larger value, a runtime error -(similar to division by 0) will happen. - =item C C will return EBCDIC code number values on an EBCDIC platform. @@ -1264,10 +1265,6 @@ is true on all platforms. If you want native code points for the low will hold. -The largest code point that is representable in UTF-EBCDIC is -U+7FFF_FFFF. If you try to pack a larger value into a character, a -runtime error (similar to division by 0) will happen. - =item C One must be careful with scalars and strings that are passed to diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index f0a2abb..72e62a5 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3028,18 +3028,23 @@ X X =item hex -=for Pod::Functions convert a string to a hexadecimal number +=for Pod::Functions convert a hexadecimal string to a number -Interprets EXPR as a hex string and returns the corresponding value. -(To convert strings that might start with either C<0>, C<0x>, or C<0b>, see -L.) If EXPR is omitted, uses C<$_>. +Interprets EXPR as a hex string and returns the corresponding numeric value. +If EXPR is omitted, uses C<$_>. print hex '0xAf'; # prints '175' print hex 'aF'; # same + $valid_input =~ /\A(?:0?[xX])?(?:_?[0-9a-fA-F])*\z/ + +A hex string consists of hex digits and an optional C<0x> or C prefix. +Each hex digit may be preceded by a single underscore, which will be ignored. +Any other character triggers a warning and causes the rest of the string +to be ignored (even leading whitespace, unlike L). +Only integers can be represented, and integer overflow triggers a warning. -Hex strings may only represent integers. Strings that would cause -integer overflow trigger a warning. Leading whitespace is not stripped, -unlike oct(). To present something as hex, look into L, +To convert strings that might start with any of C<0>, C<0x>, or C<0b>, see L. +To present something as hex, look into L, L, and L. =item import LIST @@ -6152,7 +6157,7 @@ use a real filehandle like C, not an indirect one like C<$fh>. This keyword is available only when the C<"say"> feature is enabled, or when prefixed with C; see -L. Alternately, include a C or later to the current +L. Alternately, add a C or later to the current scope. =item scalar EXPR diff --git a/pod/perlgit.pod b/pod/perlgit.pod index f5d4fec..524efcd 100644 --- a/pod/perlgit.pod +++ b/pod/perlgit.pod @@ -107,31 +107,33 @@ files, and in addition it will show things like what files have been staged for the next commit, and usually some useful information about how to change things. For instance the following: - $ git status - # On branch blead - # Your branch is ahead of 'origin/blead' by 1 commit. - # - # Changes to be committed: - # (use "git reset HEAD ..." to unstage) - # - # modified: pod/perlgit.pod - # - # Changed but not updated: - # (use "git add ..." to update what will be committed) - # - # modified: pod/perlgit.pod - # - # Untracked files: - # (use "git add ..." to include in what will be committed) - # - # deliberate.untracked + % git status + On branch blead + Your branch is ahead of 'origin/blead' by 1 commit. + + Changes to be committed: + (use "git reset HEAD ..." to unstage) + + modified: pod/perlgit.pod + + Changes not staged for commit: + (use "git add ..." to update what will be committed) + (use "git checkout -- ..." to discard changes in working + directory) + + modified: pod/perlgit.pod + + Untracked files: + (use "git add ..." to include in what will be committed) + + deliberate.untracked This shows that there were changes to this document staged for commit, and that there were further changes in the working directory not yet staged. It also shows that there was an untracked file in the working directory, and as you can see shows how to change all of this. It also shows that there is one commit on the working branch C which has -not been pushed to the C remote yet. B: that this output +not been pushed to the C remote yet. B: This output is also what you see as a template if you do not provide a message to C. @@ -177,35 +179,34 @@ to Orange Brocard, we should change his name in the AUTHORS file: You can see what files are changed: % git status - # On branch orange - # Changes to be committed: - # (use "git reset HEAD ..." to unstage) - # - # modified: AUTHORS - # + On branch orange + Changes to be committed: + (use "git reset HEAD ..." to unstage) + + modified: AUTHORS And you can see the changes: - % git diff - diff --git a/AUTHORS b/AUTHORS - index 293dd70..722c93e 100644 - --- a/AUTHORS - +++ b/AUTHORS - @@ -541,7 +541,7 @@ Lars Hecking - Laszlo Molnar - Leif Huhn - Len Johnson - -Leon Brocard - +Orange Brocard - Les Peters - Lesley Binks - Lincoln D. Stein + % git diff + diff --git a/AUTHORS b/AUTHORS + index 293dd70..722c93e 100644 + --- a/AUTHORS + +++ b/AUTHORS + @@ -541,7 +541,7 @@ Lars Hecking + Laszlo Molnar + Leif Huhn + Len Johnson + -Leon Brocard + +Orange Brocard + Les Peters + Lesley Binks + Lincoln D. Stein Now commit your change locally: - % git commit -a -m 'Rename Leon Brocard to Orange Brocard' - Created commit 6196c1d: Rename Leon Brocard to Orange Brocard - 1 files changed, 1 insertions(+), 1 deletions(-) + % git commit -a -m 'Rename Leon Brocard to Orange Brocard' + Created commit 6196c1d: Rename Leon Brocard to Orange Brocard + 1 files changed, 1 insertions(+), 1 deletions(-) The C<-a> option is used to include all files that git tracks that you have changed. If at this time, you only want to commit some of the @@ -225,20 +226,20 @@ Once you've finished writing your commit message and exited your editor, git will write your change to disk and tell you something like this: - Created commit daf8e63: explain git status and stuff about remotes - 1 files changed, 83 insertions(+), 3 deletions(-) + Created commit daf8e63: explain git status and stuff about remotes + 1 files changed, 83 insertions(+), 3 deletions(-) If you re-run C, you should see something like this: - % git status - # On branch blead - # Your branch is ahead of 'origin/blead' by 2 commits. - # - # Untracked files: - # (use "git add ..." to include in what will be committed) - # - # deliberate.untracked - nothing added to commit but untracked files present (use "git add" to track) + % git status + On branch orange + Untracked files: + (use "git add ..." to include in what will be committed) + + deliberate.untracked + + nothing added to commit but untracked files present (use "git add" to + track) When in doubt, before you do anything else, check your status and read it carefully, many questions are answered directly by the git status @@ -276,12 +277,12 @@ patch emails|/Sending patch emails> for more information. If you want to delete your temporary branch, you may do so with: - % git checkout blead - % git branch -d orange - error: The branch 'orange' is not an ancestor of your current HEAD. - If you are sure you want to delete it, run 'git branch -D orange'. - % git branch -D orange - Deleted branch orange. + % git checkout blead + % git branch -d orange + error: The branch 'orange' is not an ancestor of your current HEAD. + If you are sure you want to delete it, run 'git branch -D orange'. + % git branch -D orange + Deleted branch orange. =head2 Committing your changes @@ -303,20 +304,22 @@ Once you've finished writing your commit message and exited your editor, git will write your change to disk and tell you something like this: - Created commit daf8e63: explain git status and stuff about remotes - 1 files changed, 83 insertions(+), 3 deletions(-) + Created commit daf8e63: explain git status and stuff about remotes + 1 files changed, 83 insertions(+), 3 deletions(-) If you re-run C, you should see something like this: - % git status - # On branch blead - # Your branch is ahead of 'origin/blead' by 2 commits. - # - # Untracked files: - # (use "git add ..." to include in what will be committed) - # - # deliberate.untracked - nothing added to commit but untracked files present (use "git add" to track) + % git status + On branch blead + Your branch is ahead of 'origin/blead' by 2 commits. + (use "git push" to publish your local commits) + Untracked files: + (use "git add ..." to include in what will be committed) + + deliberate.untracked + + nothing added to commit but untracked files present (use "git add" to + track) When in doubt, before you do anything else, check your status and read it carefully, many questions are answered directly by the git status @@ -479,18 +482,18 @@ binary searches. Individual committers should create topic branches under B/B: - $ branch="$yourname/$some_descriptive_name" - $ git checkout -b $branch + % branch="$yourname/$some_descriptive_name" + % git checkout -b $branch ... do local edits, commits etc ... - $ git push origin -u $branch + % git push origin -u $branch Should you be stuck with an ancient version of git (prior to 1.7), then C will not have the C<-u> switch, and you have to replace the last step with the following sequence: - $ git push origin $branch:refs/heads/$branch - $ git config branch.$branch.remote origin - $ git config branch.$branch.merge refs/heads/$branch + % git push origin $branch:refs/heads/$branch + % git config branch.$branch.remote origin + % git config branch.$branch.merge refs/heads/$branch If you want to make changes to someone else's topic branch, you should check with its creator before making any change to it. @@ -520,20 +523,20 @@ in the git push documentation for details) after you have rebased your branch: # first rebase - $ git checkout $user/$topic - $ git fetch - $ git rebase origin/blead + % git checkout $user/$topic + % git fetch + % git rebase origin/blead # then "delete-and-push" - $ git push origin :$user/$topic - $ git push origin $user/$topic + % git push origin :$user/$topic + % git push origin $user/$topic B it is forbidden at the repository level to delete any of the "primary" branches. That is any branch matching C. Any attempt to do so will result in git producing an error like this: - $ git push origin :blead + % git push origin :blead *** It is forbidden to delete blead/maint branches in this repository error: hooks/update exited with error code 1 error: hook declined to update refs/heads/blead @@ -561,7 +564,7 @@ this is now impossible to fix in the public repository. You can remove this mis-merge locally by adding the following line to your C<.git/info/grafts> file: - 296f12bbbbaa06de9be9d09d3dcf8f4528898a49 434946e0cb7a32589ed92d18008aaa1d88515930 + 296f12bbbbaa06de9be9d09d3dcf8f4528898a49 434946e0cb7a32589ed92d18008aaa1d88515930 It is particularly important to have this graft line if any bisecting is done in the area of the "merge" in question. @@ -580,7 +583,7 @@ this once globally in their F<~/.gitconfig> by doing something like: % git config --global user.name "Ævar Arnfjörð Bjarmason" % git config --global user.email avarab@gmail.com -However, if you'd like to override that just for perl, +However, if you'd like to override that just for perl, execute something like the following in F: % git config user.email avar@cpan.org @@ -608,47 +611,48 @@ you should try out the patch. First we need to create a temporary new branch for these changes and switch into it: - % git checkout -b experimental + % git checkout -b experimental Patches that were formatted by C are applied with C: - % git am 0001-Rename-Leon-Brocard-to-Orange-Brocard.patch - Applying Rename Leon Brocard to Orange Brocard + % git am 0001-Rename-Leon-Brocard-to-Orange-Brocard.patch + Applying Rename Leon Brocard to Orange Brocard Note that some UNIX mail systems can mess with text attachments containing 'From '. This will fix them up: - % perl -pi -e's/^>From /From /' 0001-Rename-Leon-Brocard-to-Orange-Brocard.patch + % perl -pi -e's/^>From /From /' \ + 0001-Rename-Leon-Brocard-to-Orange-Brocard.patch If just a raw diff is provided, it is also possible use this two-step process: - % git apply bugfix.diff - % git commit -a -m "Some fixing" --author="That Guy " +% git apply bugfix.diff +% git commit -a -m "Some fixing" --author="That Guy " Now we can inspect the change: - % git show HEAD - commit b1b3dab48344cff6de4087efca3dbd63548ab5e2 - Author: Leon Brocard - Date: Fri Dec 19 17:02:59 2008 +0000 - - Rename Leon Brocard to Orange Brocard - - diff --git a/AUTHORS b/AUTHORS - index 293dd70..722c93e 100644 - --- a/AUTHORS - +++ b/AUTHORS - @@ -541,7 +541,7 @@ Lars Hecking - Laszlo Molnar - Leif Huhn - Len Johnson - -Leon Brocard - +Orange Brocard - Les Peters - Lesley Binks - Lincoln D. Stein + % git show HEAD + commit b1b3dab48344cff6de4087efca3dbd63548ab5e2 + Author: Leon Brocard + Date: Fri Dec 19 17:02:59 2008 +0000 + + Rename Leon Brocard to Orange Brocard + + diff --git a/AUTHORS b/AUTHORS + index 293dd70..722c93e 100644 + --- a/AUTHORS + +++ b/AUTHORS + @@ -541,7 +541,7 @@ Lars Hecking + Laszlo Molnar + Leif Huhn + Len Johnson + -Leon Brocard + +Orange Brocard + Les Peters + Lesley Binks + Lincoln D. Stein If you are a committer to Perl and you think the patch is good, you can then merge it into blead then push it out to the main repository: @@ -659,12 +663,13 @@ then merge it into blead then push it out to the main repository: If you want to delete your temporary branch, you may do so with: - % git checkout blead - % git branch -d experimental - error: The branch 'experimental' is not an ancestor of your current HEAD. - If you are sure you want to delete it, run 'git branch -D experimental'. - % git branch -D experimental - Deleted branch experimental. + % git checkout blead + % git branch -d experimental + error: The branch 'experimental' is not an ancestor of your current + HEAD. If you are sure you want to delete it, run 'git branch -D + experimental'. + % git branch -D experimental + Deleted branch experimental. =head2 Committing to blead @@ -718,19 +723,20 @@ Sometimes, blead will move while you're building or testing your changes. When this happens, your push will be rejected with a message like this: - To ssh://perl5.git.perl.org/perl.git - ! [rejected] blead -> blead (non-fast-forward) - error: failed to push some refs to 'ssh://perl5.git.perl.org/perl.git' - To prevent you from losing history, non-fast-forward updates were rejected - Merge the remote changes (e.g. 'git pull') before pushing again. See the - 'Note about fast-forwards' section of 'git push --help' for details. + To ssh://perl5.git.perl.org/perl.git + ! [rejected] blead -> blead (non-fast-forward) + error: failed to push some refs to 'ssh://perl5.git.perl.org/perl.git' + To prevent you from losing history, non-fast-forward updates were + rejected Merge the remote changes (e.g. 'git pull') before pushing + again. See the 'Note about fast-forwards' section of 'git push --help' + for details. When this happens, you can just I your work against the new position of blead, like this (assuming your remote for the master repository is "p5p"): - $ git fetch p5p - $ git rebase p5p/blead + % git fetch p5p + % git rebase p5p/blead You will see your commits being re-applied, and you will then be able to push safely. More information about rebasing can be found in the @@ -748,14 +754,14 @@ again, making it easier for future maintainers to see what has happened. Rebase as follows (assuming your work was on the branch C<< committer/somework >>): - $ git checkout committer/somework - $ git rebase blead + % git checkout committer/somework + % git rebase blead Then you can merge it into master like this: - $ git checkout blead - $ git merge --no-ff --no-commit committer/somework - $ git commit -a + % git checkout blead + % git merge --no-ff --no-commit committer/somework + % git commit -a The switches above deserve explanation. C<--no-ff> indicates that even if all your work can be applied linearly against blead, a merge commit @@ -888,9 +894,10 @@ Finally, you should then delete the remote smoke-me branch: (which is likely to produce a warning like this, which can be ignored: - remote: fatal: ambiguous argument 'refs/heads/smoke-me/tonyc/win32stat': - unknown revision or path not in the working tree. - remote: Use '--' to separate paths from revisions + remote: fatal: ambiguous argument + 'refs/heads/smoke-me/tonyc/win32stat': + unknown revision or path not in the working tree. + remote: Use '--' to separate paths from revisions ) and then delete your local branch: diff --git a/pod/perlhist.pod b/pod/perlhist.pod index 513a3ac..e798c2f 100644 --- a/pod/perlhist.pod +++ b/pod/perlhist.pod @@ -562,6 +562,9 @@ the strings?). Ricardo 5.22.0 2015-Jun-01 Steve 5.22.1-RC1 2015-Oct-31 Steve 5.22.1-RC2 2015-Nov-15 + Steve 5.22.1-RC3 2015-Dec-02 + Steve 5.22.1-RC4 2015-Dec-08 + Steve 5.22.1 2015-Dec-13 Ricardo 5.23.0 2015-Jun-20 The 5.23 development track Matthew 5.23.1 2015-Jul-20 @@ -569,6 +572,7 @@ the strings?). Peter 5.23.3 2015-Sep-20 Steve 5.23.4 2015-Oct-20 Abigail 5.23.5 2015-Nov-20 + David G 5.23.6 2015-Dec-21 =head2 SELECTED RELEASE SIZES diff --git a/pod/perlop.pod b/pod/perlop.pod index 1691614..50ee6e0 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -2309,6 +2309,21 @@ when they're the right way to get something done. Perl was made to be a glue language, and one of the things it glues together is commands. Just understand what you're getting yourself into. +Like C, backticks put the child process exit code in C<$?>. +If you'd like to manually inspect failure, you can check all possible +failure modes by inspecting C<$?> like this: + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + See L for more discussion. =item C/> diff --git a/pod/perlpodspec.pod b/pod/perlpodspec.pod index f8d1901..69a83c3 100644 --- a/pod/perlpodspec.pod +++ b/pod/perlpodspec.pod @@ -906,7 +906,7 @@ character 34 (doublequote, "), "EEamp>" for character 38 Note that in all cases of "EEwhateverE", I (whether an htmlname, or a number in any base) must consist only of -alphanumeric characters -- that is, I must watch +alphanumeric characters -- that is, I must match C. So S<"EE 0 1 2 3 E"> is invalid, because it contains spaces, which aren't alphanumeric characters. This presumably does not I special treatment by a Pod processor; diff --git a/pod/perlport.pod b/pod/perlport.pod index 8e872e4..031b2b1 100644 --- a/pod/perlport.pod +++ b/pod/perlport.pod @@ -242,9 +242,6 @@ C (included as of Perl 5.8). Keeping all data as text significantly simplifies matters. -The v-strings are portable only up to v2147483647 (0x7FFF_FFFF), that's -how far EBCDIC, or more precisely UTF-EBCDIC will go. - =head2 Files and Filesystems Most platforms these days structure files in a hierarchical fashion. diff --git a/pod/perlre.pod b/pod/perlre.pod index e45e444..08c98eb 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -1242,48 +1242,48 @@ Not doing so may lead to surprises: The problem here is that both the group named C<< a >> and the group named C<< b >> are aliases for the group belonging to C<< $1 >>. -=item Look-Around Assertions +=item Lookaround Assertions X X X X -Look-around assertions are zero-width patterns which match a specific +Lookaround assertions are zero-width patterns which match a specific pattern without including it in C<$&>. Positive assertions match when their subpattern matches, negative assertions match when their subpattern -fails. Look-behind matches text up to the current match position, -look-ahead matches text following the current match position. +fails. Lookbehind matches text up to the current match position, +lookahead matches text following the current match position. =over 4 =item C<(?=pattern)> X<(?=)> X X -A zero-width positive look-ahead assertion. For example, C +A zero-width positive lookahead assertion. For example, C matches a word followed by a tab, without including the tab in C<$&>. =item C<(?!pattern)> X<(?!)> X X -A zero-width negative look-ahead assertion. For example C +A zero-width negative lookahead assertion. For example C matches any occurrence of "foo" that isn't followed by "bar". Note -however that look-ahead and look-behind are NOT the same thing. You cannot -use this for look-behind. +however that lookahead and lookbehind are NOT the same thing. You cannot +use this for lookbehind. If you are looking for a "bar" that isn't preceded by a "foo", C will not do what you want. That's because the C<(?!foo)> is just saying that the next thing cannot be "foo"--and it's not, it's a "bar", so "foobar" will -match. Use look-behind instead (see below). +match. Use lookbehind instead (see below). =item C<(?<=pattern)> C<\K> X<(?<=)> X X X<\K> -A zero-width positive look-behind assertion. For example, C +A zero-width positive lookbehind assertion. For example, C matches a word that follows a tab, without including the tab in C<$&>. -Works only for fixed-width look-behind. +Works only for fixed-width lookbehind. There is a special form of this construct, called C<\K> (available since Perl 5.10.0), which causes the regex engine to "keep" everything it had matched prior to the C<\K> and not include it in C<$&>. This effectively provides variable-length -look-behind. The use of C<\K> inside of another look-around assertion +lookbehind. The use of C<\K> inside of another lookaround assertion is allowed, but the behaviour is currently not well defined. For various reasons C<\K> may be significantly more efficient than the @@ -1300,9 +1300,9 @@ can be rewritten as the much more efficient =item C<(? X<(? X X -A zero-width negative look-behind assertion. For example C +A zero-width negative lookbehind assertion. For example C matches any occurrence of "foo" that does not follow "bar". Works -only for fixed-width look-behind. +only for fixed-width lookbehind. =back @@ -1653,7 +1653,7 @@ C<(condition)> should be one of: (which is valid if the corresponding pair of parentheses matched); -=item a look-ahead/look-behind/evaluate zero-width assertion; +=item a lookahead/lookbehind/evaluate zero-width assertion; =item a name in angle brackets or single quotes @@ -1839,7 +1839,7 @@ the C pragma or B<-w> switch saying it C<"matches null string many times in regex">. On simple groups, such as the pattern C<< (?> [^()]+ ) >>, a comparable -effect may be achieved by negative look-ahead, as in C<[^()]+ (?! [^()] )>. +effect may be achieved by negative lookahead, as in C<[^()]+ (?! [^()] )>. This was only 4 times slower on a string with 1000000 Cs. The "grab all you can, and do not give anything back" semantic is desirable @@ -2242,7 +2242,7 @@ definition might succeed against a particular string. And if there are multiple ways it might succeed, you need to understand backtracking to know which variety of success you will achieve. -When using look-ahead assertions and negations, this can all get even +When using lookahead assertions and negations, this can all get even trickier. Imagine you'd like to find a sequence of non-digits not followed by "123". You might try to write that as @@ -2292,7 +2292,7 @@ time. Now there's indeed something following "AB" that is not We can deal with this by using both an assertion and a negation. We'll say that the first part in C<$1> must be followed both by a digit -and by something that's not "123". Remember that the look-aheads +and by something that's not "123". Remember that the lookaheads are zero-width expressions--they only look, but don't consume any of the string in their match. So rewriting this way produces what you'd expect; that is, case 5 will fail, but case 6 succeeds: @@ -2329,10 +2329,10 @@ match takes a long time to finish. A powerful tool for optimizing such beasts is what is known as an "independent group", which does not backtrack (see Lpattern) >>>). Note also that -zero-length look-ahead/look-behind assertions will not backtrack to make +zero-length lookahead/lookbehind assertions will not backtrack to make the tail match, since they are in "logical" context: only whether they match is considered relevant. For an example -where side-effects of look-ahead I have influenced the +where side-effects of lookahead I have influenced the following match, see Lpattern) >>>. =head2 Version 8 Regular Expressions diff --git a/pod/perlref.pod b/pod/perlref.pod index e570b72..5804c17 100644 --- a/pod/perlref.pod +++ b/pod/perlref.pod @@ -887,7 +887,7 @@ for obfuscated code: # @harry is (1,2,3) my $type = ref $thingy; - ($type ? $type == 'ARRAY' ? \@foo : \$bar : $baz) = $thingy; + ($type ? $type eq 'ARRAY' ? \@foo : \$bar : $baz) = $thingy; The C loop can also take a reference constructor for its loop variable, though the syntax is limited to one of the following, with an @@ -906,7 +906,7 @@ arrays-of-arrays, or arrays-of-hashes: } foreach \my %h (@array_of_hashes) { - $h{gelastic}++ if $h{type} == 'funny'; + $h{gelastic}++ if $h{type} eq 'funny'; } B Aliasing does not work correctly with closures. If you try to diff --git a/pod/perlreref.pod b/pod/perlreref.pod index e9b784e..db7c173 100644 --- a/pod/perlreref.pod +++ b/pod/perlreref.pod @@ -252,10 +252,10 @@ There is no quantifier C<{,n}>. That's interpreted as a literal string. (?P>name) Recurse into a named subpattern (python syntax) (?(cond)yes|no) (?(cond)yes) Conditional expression, where "cond" can be: - (?=pat) look-ahead - (?!pat) negative look-ahead - (?<=pat) look-behind - (?) named subpattern has matched something ('name') named subpattern has matched something diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index a407faf..eb23d55 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -1110,7 +1110,7 @@ feature, you can use one of the following: =item * -Regular expression look-ahead +Regular expression lookahead You can mimic class subtraction using lookahead. For example, what UTS#18 might write as @@ -1223,7 +1223,7 @@ Level 3 - Tailored Support [17] see UAX#10 "Unicode Collation Algorithms" [18] have Unicode::Collate but not integrated to regexes - [19] have (?<=x) and (?=x), but look-aheads or look-behinds + [19] have (?<=x) and (?=x), but lookaheads or lookbehinds should see outside of the target substring [20] need insensitive matching for linguistic features other than case; for example, hiragana to katakana, wide and @@ -1289,7 +1289,10 @@ encoding of numbers up to C<0x7FFF_FFFF>. Perl continues to allow those, and has extended that up to 13 bytes to encode code points up to what can fit in a 64-bit word. However, Perl will warn if you output any of these as being non-portable; and under strict UTF-8 input protocols, -they are forbidden. +they are forbidden. In addition, it is deprecated to use a code point +larger than what a signed integer variable on your system can hold. On +32-bit ASCII systems, this means C<0x7FFF_FFFF> is the legal maximum +going forward (much higher on 64-bit systems). =item * @@ -1300,10 +1303,11 @@ This means that all the basic characters (which includes all those that have ASCII equivalents (like C<"A">, C<"0">, C<"%">, I) are the same in both EBCDIC and UTF-EBCDIC.) -UTF-EBCDIC is used on EBCDIC platforms. The largest Unicode code points -take 5 bytes to represent (instead of 4 in UTF-8), and Perl extends it -to a maximum of 7 bytes to encode pode points up to what can fit in a -32-bit word (instead of 13 bytes and a 64-bit word in UTF-8). +UTF-EBCDIC is used on EBCDIC platforms. It generally requires more +bytes to represent a given code point than UTF-8 does; the largest +Unicode code points take 5 bytes to represent (instead of 4 in UTF-8), +and, extended for 64-bit words, it uses 14 bytes instead of 13 bytes in +UTF-8. =item * diff --git a/pod/perlvar.pod b/pod/perlvar.pod index f5922ad..09ec06d 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -1667,12 +1667,12 @@ Under a few operating systems, C<$^E> may contain a more verbose error indicator, such as in this case, "CDROM tray not closed." Systems that do not support extended error messages leave C<$^E> the same as C<$!>. -Finally, C<$?> may be set to non-0 value if the external program +Finally, C<$?> may be set to a non-0 value if the external program F fails. The upper eight bits reflect specific error conditions encountered by the program (the program's C value). The lower eight bits reflect mode of failure, like signal death and core dump information. See L for details. In contrast to -C<$!> and C<$^E>, which are set only if error condition is detected, +C<$!> and C<$^E>, which are set only if an error condition is detected, the variable C<$?> is set on each C or pipe C, overwriting the old value. This is more like C<$@>, which on every C is always set on failure and cleared on success. @@ -1867,17 +1867,18 @@ Mnemonic: similar to B and B. =item $@ X<$@> X<$EVAL_ERROR> -The Perl syntax error message from the -last C operator. If C<$@> is -the null string, the last C parsed and executed correctly -(although the operations you invoked may have failed in the normal -fashion). +The Perl error from the last C operator, i.e. the last exception that +was caught. For C, this is either a runtime error message or the +string or reference C was called with. The C form also +catches syntax errors and other compile time exceptions. + +If no error occurs, C sets C<$@> to the empty string. Warning messages are not collected in this variable. You can, however, set up a routine to process warnings by setting C<$SIG{__WARN__}> as described in L. -Mnemonic: Where was the syntax error "at"? +Mnemonic: Where was the error "at"? =back diff --git a/pp.c b/pp.c index 7301d8b..7071478 100644 --- a/pp.c +++ b/pp.c @@ -72,19 +72,22 @@ PP(pp_padav) if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) )) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); EXTEND(SP, 1); + if (PL_op->op_flags & OPf_REF) { PUSHs(TARG); RETURN; - } else if (PL_op->op_private & OPpMAYBE_LVSUB) { - const I32 flags = is_lvalue_sub(); - if (flags && !(flags & OPpENTERSUB_INARGS)) { - if (GIMME_V == G_SCALAR) - /* diag_listed_as: Can't return %s to lvalue scalar context */ - Perl_croak(aTHX_ "Can't return array to lvalue scalar context"); - PUSHs(TARG); - RETURN; + } + else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { + if (GIMME_V == G_SCALAR) + /* diag_listed_as: Can't return %s to lvalue scalar context */ + Perl_croak(aTHX_ "Can't return array to lvalue scalar context"); + PUSHs(TARG); + RETURN; } } + gimme = GIMME_V; if (gimme == G_ARRAY) { /* XXX see also S_pushav in pp_hot.c */ @@ -125,17 +128,19 @@ PP(pp_padhv) if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO )) if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) )) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); + if (PL_op->op_flags & OPf_REF) RETURN; else if (PL_op->op_private & OPpMAYBE_LVSUB) { - const I32 flags = is_lvalue_sub(); - if (flags && !(flags & OPpENTERSUB_INARGS)) { - if (GIMME_V == G_SCALAR) - /* diag_listed_as: Can't return %s to lvalue scalar context */ - Perl_croak(aTHX_ "Can't return hash to lvalue scalar context"); - RETURN; - } + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { + if (GIMME_V == G_SCALAR) + /* diag_listed_as: Can't return %s to lvalue scalar context */ + Perl_croak(aTHX_ "Can't return hash to lvalue scalar context"); + RETURN; + } } + gimme = GIMME_V; if (gimme == G_ARRAY) { RETURNOP(Perl_do_kv(aTHX)); @@ -143,7 +148,8 @@ PP(pp_padhv) else if ((PL_op->op_private & OPpTRUEBOOL || ( PL_op->op_private & OPpMAYBE_TRUEBOOL && block_gimme() == G_VOID )) - && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied))) + && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied)) + ) SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0))); else if (gimme == G_SCALAR) { SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG)); @@ -1367,7 +1373,7 @@ PP(pp_multiply) goto do_iv; SP--; result = nl * nr; -# if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN && NVSIZE == 16 +# if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 if (Perl_isinf(result)) { Zero((U8*)&result + 8, 8, U8); } @@ -1499,7 +1505,7 @@ PP(pp_multiply) NV result = left * right; (void)POPs; -#if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN && NVSIZE == 16 +#if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 if (Perl_isinf(result)) { Zero((U8*)&result + 8, 8, U8); } @@ -2636,6 +2642,8 @@ S_scomplement(pTHX_ SV *targ, SV *sv) U8 *result; U8 *p; + Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[PL_op->op_type]); Newx(result, targlen + 1, U8); p = result; while (tmps < send) { diff --git a/pp.h b/pp.h index 687b0ca..e3222e4 100644 --- a/pp.h +++ b/pp.h @@ -56,35 +56,46 @@ Refetch the stack pointer. Used after a callback. See L. #define TARG targ #if defined(DEBUGGING) && defined(PERL_USE_GCC_BRACE_GROUPS) -# define PUSHMARK(p) \ + +# define PUSHMARK(p) \ STMT_START { \ I32 * mark_stack_entry; \ - if (UNLIKELY((mark_stack_entry = ++PL_markstack_ptr) == PL_markstack_max)) \ + if (UNLIKELY((mark_stack_entry = ++PL_markstack_ptr) \ + == PL_markstack_max)) \ mark_stack_entry = markstack_grow(); \ *mark_stack_entry = (I32)((p) - PL_stack_base); \ - DEBUG_s(PerlIO_printf(Perl_debug_log, "MARK push %p %d\n", \ - PL_markstack_ptr, *mark_stack_entry)); \ + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, \ + "MARK push %p %"IVdf"\n", \ + PL_markstack_ptr, (IV)*mark_stack_entry))); \ } STMT_END -# define TOPMARK \ + +# define TOPMARK \ ({ \ - DEBUG_s(PerlIO_printf(Perl_debug_log, "MARK top %p %d\n", \ - PL_markstack_ptr, *PL_markstack_ptr)); \ + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, \ + "MARK top %p %"IVdf"\n", \ + PL_markstack_ptr, (IV)*PL_markstack_ptr))); \ *PL_markstack_ptr; \ }) -# define POPMARK \ + +# define POPMARK \ ({ \ - DEBUG_s(PerlIO_printf(Perl_debug_log, "MARK pop %p %d\n", \ - (PL_markstack_ptr-1), *(PL_markstack_ptr-1))); \ + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, \ + "MARK pop %p %"IVdf"\n", \ + (PL_markstack_ptr-1), (IV)*(PL_markstack_ptr-1)))); \ assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");\ *PL_markstack_ptr--; \ }) -# define INCMARK \ + +# define INCMARK \ ({ \ - DEBUG_s(PerlIO_printf(Perl_debug_log, "MARK inc %p %d\n", \ - (PL_markstack_ptr+1), *(PL_markstack_ptr+1))); \ + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, \ + "MARK inc %p %"IVdf"\n", \ + (PL_markstack_ptr+1), (IV)*(PL_markstack_ptr+1)))); \ *PL_markstack_ptr++; \ }) + #else + # define PUSHMARK(p) \ STMT_START { \ I32 * mark_stack_entry; \ @@ -377,7 +388,7 @@ Does not use C. See also C>, C> and C>. STMT_START { \ IV TARGi_iv = i; \ if (LIKELY( \ - ((SvFLAGS(TARG) & (SVTYPEMASK|SVf_THINKFIRST)) == SVt_IV) \ + ((SvFLAGS(TARG) & (SVTYPEMASK|SVf_THINKFIRST|SVf_IVisUV)) == SVt_IV) \ & (do_taint ? !TAINT_get : 1))) \ { \ /* Cheap SvIOK_only(). \ @@ -399,7 +410,7 @@ Does not use C. See also C>, C> and C>. STMT_START { \ UV TARGu_uv = u; \ if (LIKELY( \ - ((SvFLAGS(TARG) & (SVTYPEMASK|SVf_THINKFIRST)) == SVt_IV) \ + ((SvFLAGS(TARG) & (SVTYPEMASK|SVf_THINKFIRST|SVf_IVisUV)) == SVt_IV) \ & (do_taint ? !TAINT_get : 1) \ & (TARGu_uv <= (UV)IV_MAX))) \ { \ diff --git a/pp_sort.c b/pp_sort.c index 64a67d8..51742f6 100644 --- a/pp_sort.c +++ b/pp_sort.c @@ -1657,6 +1657,13 @@ PP(pp_sort) PL_secondgv = MUTABLE_GV(SvREFCNT_inc( gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV) )); + /* make sure the GP isn't removed out from under us for + * the SAVESPTR() */ + save_gp(PL_firstgv, 0); + save_gp(PL_secondgv, 0); + /* we don't want modifications localized */ + GvINTRO_off(PL_firstgv); + GvINTRO_off(PL_secondgv); SAVESPTR(GvSV(PL_firstgv)); SAVESPTR(GvSV(PL_secondgv)); } diff --git a/pp_sys.c b/pp_sys.c index 373590f..15b4d8b 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -695,8 +695,6 @@ PP(pp_pipe_op) GV * const wgv = MUTABLE_GV(POPs); GV * const rgv = MUTABLE_GV(POPs); - assert (isGV_with_GP(rgv)); - assert (isGV_with_GP(wgv)); rstio = GvIOn(rgv); if (IoIFP(rstio)) do_close(rgv, FALSE); diff --git a/proto.h b/proto.h index b017eb0..9fb3ead 100644 --- a/proto.h +++ b/proto.h @@ -4853,6 +4853,9 @@ PERL_CALLCONV void Perl__invlist_dump(pTHX_ PerlIO *file, I32 level, const char* assert(file); assert(indent); assert(invlist) #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C) +PERL_CALLCONV bool Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b); +#define PERL_ARGS_ASSERT__INVLISTEQ \ + assert(a); assert(b) PERL_CALLCONV SV* Perl__new_invlist_C_array(pTHX_ const UV* const list) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY \ @@ -5314,6 +5317,9 @@ STATIC bool S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U assert(stash); assert(name) #endif #if defined(PERL_IN_UTF8_C) +STATIC UV S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, const char *normal, const char *special); +#define PERL_ARGS_ASSERT__TO_UTF8_CASE \ + assert(p); assert(ustrp); assert(swashp); assert(normal) STATIC UV S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_CHECK_LOCALE_BOUNDARY_CROSSING \ diff --git a/regcharclass.h b/regcharclass.h index 22b371d..30e9133 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -2514,9 +2514,9 @@ * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt - * 2a113118f00b0a9ad6a12eb55b8341a332d8547a8841df2377e938e0fcd1b967 lib/unicore/mktables + * 8e23f7adafce8ef1aadbbb3f1e942c14f5d5c8318599cae7ed0ad555e60d4639 lib/unicore/mktables * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version - * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl + * 996abda3c0fbc2bfd575092af09e3b9b0331e624eb2e969a268457f8fd31ecbb regen/charset_translations.pl * d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl * 393f8d882713a3ba227351ad0f00ea4839fda74fcf77dcd1cdf31519925adba5 regen/regcharclass_multi_char_folds.pl * ex: set ro: */ diff --git a/regcomp.c b/regcomp.c index da6eb16..8474e82 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1308,7 +1308,8 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, else { anded_flags = ANYOF_FLAGS(and_with) &( ANYOF_COMMON_FLAGS - |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER); + |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER + |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP); } } @@ -1463,7 +1464,8 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, if (OP(or_with) != ANYOFD) { ored_flags |= ANYOF_FLAGS(or_with) - & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER; + & ( ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER + |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP); } } @@ -1665,7 +1667,8 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) * by the time we reach here */ assert(! (ANYOF_FLAGS(ssc) & ~( ANYOF_COMMON_FLAGS - |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))); + |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER + |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP))); populate_ANYOF_from_invlist( (regnode *) ssc, &invlist); @@ -3364,6 +3367,14 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour * The adjacent nodes actually may be separated by NOTHING-kind nodes, and * these get optimized out * + * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full + * as possible, even if that means splitting an existing node so that its first + * part is moved to the preceeding node. This would maximise the efficiency of + * memEQ during matching. Elsewhere in this file, khw proposes splitting + * EXACTFish nodes into portions that don't change under folding vs those that + * do. Those portions that don't change may be the only things in the pattern that + * could be used to find fixed and floating strings. + * * If a node is to match under /i (folded), the number of characters it matches * can be different than its character length if it contains a multi-character * fold. *min_subtract is set to the total delta number of characters of the @@ -9378,9 +9389,9 @@ Perl__load_PL_utf8_foldclosures (pTHX) } #endif -#ifdef PERL_ARGS_ASSERT__INVLISTEQ +#if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE) bool -S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) +Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) { /* Return a boolean as to if the two passed in inversion lists are * identical. The final argument, if TRUE, says to take the complement of @@ -9979,7 +9990,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; paren = *RExC_parse++; - ret = NULL; /* For look-ahead/behind. */ + ret = NULL; /* For lookahead/behind. */ switch (paren) { case 'P': /* (?P...) variants for those used to PCRE/Python */ @@ -12357,7 +12368,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) reparse: /* We look for the EXACTFish to EXACT node optimizaton only if - * folding. (And we don't need to figure this out until pass 2) */ + * folding. (And we don't need to figure this out until pass 2). + * XXX It might actually make sense to split the node into portions + * that are exact and ones that aren't, so that we could later use + * the exact ones to find the longest fixed and floating strings. + * One would want to join them back into a larger node. One could + * use a pseudo regnode like 'EXACT_ORIG_FOLD' */ maybe_exact = FOLD && PASS2; /* XXX The node can hold up to 255 bytes, yet this only goes to @@ -12746,14 +12762,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) goto not_fold_common; } else /* A regular FOLD code point */ - if (! ( UTF + if (! ( UTF #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \ || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \ || UNICODE_DOT_DOT_VERSION > 0) - /* See comments for join_exact() as to why we fold this - * non-UTF at compile time */ - || (node_type == EXACTFU - && ender == LATIN_SMALL_LETTER_SHARP_S) + /* See comments for join_exact() as to why we fold + * this non-UTF at compile time */ + || ( node_type == EXACTFU + && ender == LATIN_SMALL_LETTER_SHARP_S) #endif )) { /* Here, are folding and are not UTF-8 encoded; therefore @@ -13096,9 +13112,6 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr) if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) { ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP; } - else if (end >= NUM_ANYOF_CODE_POINTS) { - ANYOF_FLAGS(node) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES; - } /* Quit if are above what we should change */ if (start >= NUM_ANYOF_CODE_POINTS) { @@ -14369,8 +14382,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, bool has_user_defined_property = FALSE; /* inversion list of code points this node matches only when the target - * string is in UTF-8. (Because is under /d) */ - SV* depends_list = NULL; + * string is in UTF-8. These are all non-ASCII, < 256. (Because is under + * /d) */ + SV* has_upper_latin1_only_utf8_matches = NULL; /* Inversion list of code points this node matches regardless of things * like locale, folding, utf8ness of the target string */ @@ -14423,9 +14437,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, ret = reganode(pRExC_state, (LOC) ? ANYOFL - : (DEPENDS_SEMANTICS) - ? ANYOFD - : ANYOF, + : ANYOF, 0); if (SIZE_ONLY) { @@ -14779,15 +14791,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, optimizable = FALSE; /* Will have to leave this an ANYOF node */ - /* We don't know yet, so have to assume that the - * property could match something in the upper Latin1 - * range, hence something that isn't utf8. Note that - * this would cause things in to match - * inappropriately, except that any \p{}, including - * this one forces Unicode semantics, which means there - * is no */ - ANYOF_FLAGS(ret) - |= ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES; + /* We don't know yet what this matches, so have to flag + * it */ + ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP; } else { @@ -15785,9 +15791,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, PL_fold_latin1[j]); } else { - depends_list = - add_cp_to_invlist(depends_list, - PL_fold_latin1[j]); + has_upper_latin1_only_utf8_matches + = add_cp_to_invlist( + has_upper_latin1_only_utf8_matches, + PL_fold_latin1[j]); } } @@ -15851,8 +15858,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, else { /* Similarly folds involving non-ascii Latin1 * characters under /d are added to their list */ - depends_list = add_cp_to_invlist(depends_list, - c); + has_upper_latin1_only_utf8_matches + = add_cp_to_invlist( + has_upper_latin1_only_utf8_matches, + c); } } } @@ -15928,13 +15937,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, cp_list = posixes; } - if (depends_list) { - _invlist_union(depends_list, nonascii_but_latin1_properties, - &depends_list); + if (has_upper_latin1_only_utf8_matches) { + _invlist_union(has_upper_latin1_only_utf8_matches, + nonascii_but_latin1_properties, + &has_upper_latin1_only_utf8_matches); SvREFCNT_dec_NN(nonascii_but_latin1_properties); } else { - depends_list = nonascii_but_latin1_properties; + has_upper_latin1_only_utf8_matches + = nonascii_but_latin1_properties; } } } @@ -15948,8 +15959,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * class that isn't a Unicode property, and which matches above Unicode, \W * or [\x{110000}] for example. * (Note that in this case, unlike the Posix one above, there is no - * , because having a Unicode property forces Unicode - * semantics */ + * , because having a Unicode property + * forces Unicode semantics */ if (properties) { if (cp_list) { @@ -15998,7 +16009,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * locales, or the class matches at least one 0-255 range code point */ if (LOC && FOLD) { if (only_utf8_locale_list) { - ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD; + ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD + |ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES; } else if (cp_list) { /* Look to see if a 0-255 code point is in list */ UV start, end; @@ -16010,14 +16022,83 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, } } +#define MATCHES_ALL_NON_UTF8_NON_ASCII(ret) \ + ( DEPENDS_SEMANTICS \ + && ANYOF_FLAGS(ret) \ + & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER) + + /* See if we can simplify things under /d */ + if ( has_upper_latin1_only_utf8_matches + || MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) + { + if (has_upper_latin1_only_utf8_matches) { + if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) { + + /* Here, we have two, almost opposite, constraints in effect + * for upper latin1 characters. The macro means they all match + * when the target string ISN'T in UTF-8. + * 'has_upper_latin1_only_utf8_matches' contains the chars that + * match only if the target string IS UTF-8. Therefore the + * ones in 'has_upper_latin1_only_utf8_matches' match + * regardless of UTF-8, so can be added to the regular list, + * and 'has_upper_latin1_only_utf8_matches' cleared */ + _invlist_union(cp_list, + has_upper_latin1_only_utf8_matches, + &cp_list); + SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches); + has_upper_latin1_only_utf8_matches = NULL; + } + else if (cp_list) { + + /* Here, 'cp_list' gives chars that always match, and + * 'has_upper_latin1_only_utf8_matches' gives chars that were + * specified to match only if the target string is in UTF-8. + * It may be that these overlap, so we can subtract the + * unconditionally matching from the conditional ones, to make + * the conditional list as small as possible, perhaps even + * clearing it, in which case more optimizations are possible + * later */ + _invlist_subtract(has_upper_latin1_only_utf8_matches, + cp_list, + &has_upper_latin1_only_utf8_matches); + if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) { + SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches); + has_upper_latin1_only_utf8_matches = NULL; + } + } + } + + /* Similarly, if the unconditional matches include every upper latin1 + * character, we can clear that flag to permit later optimizations */ + if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) { + SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1); + _invlist_subtract(only_non_utf8_list, cp_list, &only_non_utf8_list); + if (_invlist_len(only_non_utf8_list) == 0) { + ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER; + } + SvREFCNT_dec_NN(only_non_utf8_list); + only_non_utf8_list = NULL;; + } + + /* If we haven't gotten rid of all conditional matching, we change the + * regnode type to indicate that */ + if ( has_upper_latin1_only_utf8_matches + || MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) + { + OP(ret) = ANYOFD; + optimizable = FALSE; + } + } +#undef MATCHES_ALL_NON_UTF8_NON_ASCII + /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known * at compile time. Besides not inverting folded locale now, we can't * invert if there are things such as \w, which aren't known until runtime * */ if (cp_list && invert + && OP(ret) != ANYOFD && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS)) - && ! depends_list && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION) { _invlist_invert(cp_list); @@ -16059,16 +16140,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * adjacent such nodes. And if the class is equivalent to things like /./, * expensive run-time swashes can be avoided. Now that we have more * complete information, we can find things necessarily missed by the - * earlier code. I (khw) did some benchmarks and found essentially no - * speed difference between using a POSIXA node versus an ANYOF node, so - * there is no reason to optimize, for example [A-Za-z0-9_] into - * [[:word:]]/a (although if we did it in the sizing pass it would save - * space). _invlistEQ() could be used if one ever wanted to do something - * like this at this point in the code */ - - if (optimizable && cp_list && ! invert && ! depends_list) { + * earlier code. Another possible "optimization" that isn't done is that + * something like [Ee] could be changed into an EXACTFU. khw tried this + * and found that the ANYOF is faster, including for code points not in the + * bitmap. This still might make sense to do, provided it got joined with + * an adjacent node(s) to create a longer EXACTFU one. This could be + * accomplished by creating a pseudo ANYOF_EXACTFU node type that the join + * routine would know is joinable. If that didn't happen, the node type + * could then be made a straight ANYOF */ + + if (optimizable && cp_list && ! invert) { UV start, end; U8 op = END; /* The optimzation node-type */ + int posix_class = -1; /* Illegal value */ const char * cur_parse= RExC_parse; invlist_iterinit(cp_list); @@ -16151,6 +16235,49 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, } invlist_iterfinish(cp_list); + if (op == END) { + const UV cp_list_len = _invlist_len(cp_list); + const UV* cp_list_array = invlist_array(cp_list); + + /* Here, didn't find an optimization. See if this matches any of + * the POSIX classes. These run slightly faster for above-Unicode + * code points, so don't bother with POSIXA ones nor the 2 that + * have no above-Unicode matches. We can avoid these checks unless + * the ANYOF matches at least as high as the lowest POSIX one + * (which was manually found to be \v. The actual code point may + * increase in later Unicode releases, if a higher code point is + * assigned to be \v, but this code will never break. It would + * just mean we could execute the checks for posix optimizations + * unnecessarily) */ + + if (cp_list_array[cp_list_len-1] > 0x2029) { + for (posix_class = 0; + posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC; + posix_class++) + { + int try_inverted; + if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) { + continue; + } + for (try_inverted = 0; try_inverted < 2; try_inverted++) { + + /* Check if matches normal or inverted */ + if (_invlistEQ(cp_list, + PL_XPosix_ptrs[posix_class], + try_inverted)) + { + op = (try_inverted) + ? NPOSIXU + : POSIXU; + *flagp |= HASWIDTH|SIMPLE; + goto found_posix; + } + } + } + found_posix: ; + } + } + if (op != END) { RExC_parse = (char *)orig_parse; RExC_emit = (regnode *)orig_emit; @@ -16168,6 +16295,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, TRUE /* downgradable to EXACT */ ); } + else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) { + FLAGS(ret) = posix_class; + } SvREFCNT_dec_NN(cp_list); return ret; @@ -16188,16 +16318,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, /* Here, the bitmap has been populated with all the Latin1 code points that * always match. Can now add to the overall list those that match only - * when the target string is UTF-8 (). */ - if (depends_list) { + * when the target string is UTF-8 (). + * */ + if (has_upper_latin1_only_utf8_matches) { if (cp_list) { - _invlist_union(cp_list, depends_list, &cp_list); - SvREFCNT_dec_NN(depends_list); + _invlist_union(cp_list, + has_upper_latin1_only_utf8_matches, + &cp_list); + SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches); } else { - cp_list = depends_list; + cp_list = has_upper_latin1_only_utf8_matches; } - ANYOF_FLAGS(ret) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES; + ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP; } /* If there is a swash and more than one element, we can't use the swash in @@ -16265,18 +16398,13 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) { assert(! (ANYOF_FLAGS(node) - & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES - |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES))); + & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)); ARG_SET(node, ANYOF_ONLY_HAS_BITMAP); } else { AV * const av = newAV(); SV *rv; - assert(ANYOF_FLAGS(node) - & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES - |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD)); - av_store(av, 0, (runtime_defns) ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef); if (swash) { @@ -16340,10 +16468,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA; - assert(ANYOF_FLAGS(node) - & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES - |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD)); - if (data && data->count) { const U32 n = ARG(node); @@ -16355,9 +16479,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, si = *ary; /* ary[0] = the string to initialize the swash with */ - /* Elements 3 and 4 are either both present or both absent. [3] is - * any inversion list generated at compile time; [4] indicates if - * that inversion list has any user-defined properties in it. */ if (av_tindex(av) >= 2) { if (only_utf8_locale_ptr && ary[2] @@ -16370,6 +16491,10 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, *only_utf8_locale_ptr = NULL; } + /* Elements 3 and 4 are either both present or both absent. [3] + * is any inversion list generated at compile time; [4] + * indicates if that inversion list has any user-defined + * properties in it. */ if (av_tindex(av) >= 3) { invlist = ary[3]; if (SvUV(ary[4])) { @@ -17254,7 +17379,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ else if (k == ANYOF) { const U8 flags = ANYOF_FLAGS(o); int do_sep = 0; - SV* bitmap_invlist; /* Will hold what the bit map contains */ + SV* bitmap_invlist = NULL; /* Will hold what the bit map contains */ if (OP(o) == ANYOFL) { @@ -17288,10 +17413,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } } - if ((flags & (ANYOF_MATCHES_ALL_ABOVE_BITMAP - |ANYOF_HAS_UTF8_NONBITMAP_MATCHES - |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES - |ANYOF_LOC_FOLD))) + if ( ARG(o) != ANYOF_ONLY_HAS_BITMAP + || (flags + & ( ANYOF_MATCHES_ALL_ABOVE_BITMAP + |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP + |ANYOF_LOC_FOLD))) { if (do_sep) { Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]); @@ -17330,11 +17456,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if (*s == '\n') { const char * const t = ++s; - if (flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES) { - sv_catpvs(sv, "{outside bitmap}"); - } - else { - sv_catpvs(sv, "{utf8}"); + if (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP) { + if (OP(o) == ANYOFD) { + sv_catpvs(sv, "{utf8}"); + } + else { + sv_catpvs(sv, "{outside bitmap}"); + } } if (byte_output) { @@ -18249,23 +18377,21 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist) int i; UV start, end; unsigned int punct_count = 0; - SV* invlist = NULL; - SV** invlist_ptr; /* Temporary, in case bitmap_invlist is NULL */ + SV* invlist; bool allow_literals = TRUE; + bool inverted_for_output = FALSE; PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS; - invlist_ptr = (bitmap_invlist) ? bitmap_invlist : &invlist; - /* Worst case is exactly every-other code point is in the list */ - *invlist_ptr = _new_invlist(NUM_ANYOF_CODE_POINTS / 2); + invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2); /* Convert the bit map to an inversion list, keeping track of how many * ASCII puncts are set, including an extra amount for the backslashed * ones. */ for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) { if (BITMAP_TEST(bitmap, i)) { - *invlist_ptr = add_cp_to_invlist(*invlist_ptr, i); + invlist = add_cp_to_invlist(invlist, i); if (isPUNCT_A(i)) { punct_count++; if isBACKSLASHED_PUNCT(i) { @@ -18276,8 +18402,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist) } /* Nothing to output */ - if (_invlist_len(*invlist_ptr) == 0) { - SvREFCNT_dec(invlist); + if (_invlist_len(invlist) == 0) { + SvREFCNT_dec_NN(invlist); return FALSE; } @@ -18285,8 +18411,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist) * literals, but if a range (nearly) spans all of them, it's best to output * it as a single range. This code will use a single range if all but 2 * printables are in it */ - invlist_iterinit(*invlist_ptr); - while (invlist_iternext(*invlist_ptr, &start, &end)) { + invlist_iterinit(invlist); + while (invlist_iternext(invlist, &start, &end)) { /* If range starts beyond final printable, it doesn't have any in it */ if (start > MAX_PRINT_A) { @@ -18309,7 +18435,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist) break; } } - invlist_iterfinish(*invlist_ptr); + invlist_iterfinish(invlist); /* The legibility of the output depends mostly on how many punctuation * characters are output. There are 32 possible ASCII ones, and some have @@ -18324,19 +18450,35 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist) /* Add everything remaining to the list, so when we invert it just * below, it will be excluded */ - _invlist_union_complement_2nd(*invlist_ptr, PL_InBitmap, invlist_ptr); - _invlist_invert(*invlist_ptr); + _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist); + _invlist_invert(invlist); + inverted_for_output = TRUE; } /* Here we have figured things out. Output each range */ - invlist_iterinit(*invlist_ptr); - while (invlist_iternext(*invlist_ptr, &start, &end)) { + invlist_iterinit(invlist); + while (invlist_iternext(invlist, &start, &end)) { if (start >= NUM_ANYOF_CODE_POINTS) { break; } put_range(sv, start, end, allow_literals); } - invlist_iterfinish(*invlist_ptr); + invlist_iterfinish(invlist); + + if (bitmap_invlist) { + + /* Here, wants the inversion list returned. If we inverted it, we have + * to restore it to the original */ + if (inverted_for_output) { + _invlist_invert(invlist); + _invlist_intersection(invlist, PL_InBitmap, &invlist); + } + + *bitmap_invlist = invlist; + } + else { + SvREFCNT_dec_NN(invlist); + } return TRUE; } diff --git a/regcomp.h b/regcomp.h index 0b69f6e..5c12a21 100644 --- a/regcomp.h +++ b/regcomp.h @@ -373,51 +373,130 @@ struct regnode_ssc { #define PASS1 SIZE_ONLY #define PASS2 (! SIZE_ONLY) -/* If the bitmap fully represents what this ANYOF node can match, the - * ARG is set to this special value (since 0, 1, ... are legal, but will never - * reach this high). */ +/* An ANYOF node is basically a bitmap with the index being a code point. If + * the bit for that code point is 1, the code point matches; if 0, it doesn't + * match (complemented if inverted). There is an additional mechanism to deal + * with cases where the bitmap is insufficient in and of itself. This #define + * indicates if the bitmap does fully represent what this ANYOF node can match. + * The ARG is set to this special value (since 0, 1, ... are legal, but will + * never reach this high). */ #define ANYOF_ONLY_HAS_BITMAP ((U32) -1) -/* Below are the flags for node->flags of ANYOF. These are in short supply, - * with none currently available. The ABOVE_BITMAP_ALL bit could be freed up - * by resorting to creating a swash containing everything above 255. This - * seems likely to introduce a performance penalty (but actual numbers haven't - * been done), so its probably better do some of the other possibilities below - * in preference to this. +/* When the bimap isn't completely sufficient for handling the ANYOF node, + * flags (in node->flags of the ANYOF node) get set to indicate this. These + * are perennially in short supply. Beyond several cases where warnings need + * to be raised under certain circumstances, currently, there are six cases + * where the bitmap alone isn't sufficient. We could use six flags to + * represent the 6 cases, but to save flags bits, we play some games. The + * cases are: * - * If just one bit is required, it seems to me (khw) that the best option would - * be to turn the ANYOF_LOC_REQ_UTF8 bit into a separate node type: a - * specialization of the ANYOFL type, freeing up the currently occupied bit. - * When turning a bit into a node type, one has to take into consideration that - * a SSC may use that bit -- not just a regular ANYOF[DL]?. In the case of - * ANYOF_LOC_REQ_UTF8, the only likely problem is accurately settting the SSC - * node-type to the new one, which would likely involve S_ssc_or and S_ssc_and, - * and not how the SSC currently gets set to ANYOFL. This bit is a natural - * candidate for being a separate node type because it is a specialization of - * the current ANYOFL, and because no other ANYOFL-only bits are set when it - * is; also most of its uses are actually outside the reginclass() function, so - * this could be done with no performance penalty. The other potential bits - * seem to me to have a potential issue with a combinatorial explosion of node - * types, because of not having that mutual exclusivity, where you may end up - * having to have a node type for bitX being set, one for bitY, and one for - * both bitXY. + * 1) The bitmap has a compiled-in very finite size. So something else needs + * to be used to specify if a code point that is too large for the bitmap + * actually matches. The mechanism currently is a swash or inversion + * list. ANYOF_ONLY_HAS_BITMAP, described above, being TRUE indicates + * there are no matches of too-large code points. But if it is FALSE, + * then almost certainly there are matches too large for the bitmap. (The + * other cases, described below, either imply this one or are extremely + * rare in practice.) So we can just assume that a too-large code point + * will need something beyond the bitmap if ANYOF_ONLY_HAS_BITMAP is + * FALSE, instead of having a separate flag for this. + * 2) A subset of item 1) is if all possible code points outside the bitmap + * match. This is a common occurrence when the class is complemented, + * like /[^ij]/. Therefore a bit is reserved to indicate this, + * ANYOF_MATCHES_ALL_ABOVE_BITMAP. If it became necessary, this bit could + * be replaced by using the normal swash mechanism, but with a performance + * penalty. + * 3) Under /d rules, it can happen that code points that are in the upper + * latin1 range (\x80-\xFF or their equivalents on EBCDIC platforms) match + * only if the runtime target string being matched against is UTF-8. For + * example /[\w[:punct:]]/d. This happens only for posix classes (with a + * couple of exceptions, like \d), and all such ones also have + * above-bitmap matches. Thus, 3) implies 1) as well. Note that /d rules + * are no longer encouraged; 'use 5.14' or higher deselects them. But a + * flag is required so that they can be properly handled. But it can be a + * shared flag: see 5) below. + * 4) Also under /d rules, something like /[\Wfoo] will match everything in + * the \x80-\xFF range, unless the string being matched against is UTF-8. + * A swash could be created for this case, but this is relatively common, + * and it turns out that it's all or nothing: if any one of these code + * points matches, they all do. Hence a single bit suffices. We use a + * shared bit that doesn't take up space by itself: + * ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER. + * This also implies 1), with one exception: [:^cntrl:]. + * 5) A user-defined \p{} property may not have been defined by the time the + * regex is compiled. In this case, we don't know until runtime what it + * will match, so we have to assume it could match anything, including + * code points that ordinarily would be in the bitmap. A flag bit is + * necessary to indicate this , though it can be shared with the item 3) + * flag, as that only occurs under /d, and this only occurs under non-d. + * This case is quite uncommon in the field, and the /(?[ ...])/ construct + * is a better way to accomplish what this feature does. This case also + * implies 1). + * ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP + * is the shared bit. + * 6) /[foo]/il may have folds that are only valid if the runtime locale is a + * UTF-8 one. These are quite rare, so it would be good to avoid the + * expense of looking for them. But /l matching is slow anyway, and we've + * traditionally not worried to much about its performance. And this + * condition requires the ANYOF_LOC_FOLD flag to be set, so testing for + * that flag would be sufficient to rule out most cases of this. So it is + * unclear if this should have a flag or not. But, one is currently + * allocated for this purpose, ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES (and the + * text below indicates how to share it, should another bit be needed). * - * If you don't want to do this, or two bits are required, one could instead - * rename the ANYOF_POSIXL bit to be ANYOFL_LARGE, to mean that the ANYOF node - * has an extra 32 bits beyond what a regular one does. That's what it - * effectively means now, with the extra space all for the POSIX class bits. - * But those classes actually only occupy 30 bits, so the ANYOF_LOC_REQ_BIT (if - * an extra node type for it hasn't been created) and/or the ANYOF_LOC_FOLD - * bits could be moved there. The downside of this is that ANYOFL nodes with - * whichever of the bits get moved would have to have the extra space always - * allocated. + * At the moment, there are no spare bits, but this could be changed by various + * tricks. Notice that item 6) is not independent of the ANYOF_LOC_FOLD flag + * below. Also, the ANYOF_LOC_REQ_UTF8 flag is set only if both these aren't. + * We can therefore use a 2-bit field to represent these 3 flags, as follows: + * 00 => ANYOF_LOC_REQ_UTF8 + * 01 => no folding + * 10 => ANYOF_LOC_FOLD alone + * 11 => ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES * - * If three bits are required, one could additionally make a node type for - * ANYOFL_LARGE, removing that as a bit, and move both the above bits to that - * extra word. There isn't an SSC problem as all SSCs are this large anyway, - * and the SSC could be set to this node type. REGINCLASS would have to be - * modified so that if the node type were this, it would call reginclass(). - * as the flag bit that does this now would be gone. + * Beyond that, note that the information may be conveyed by creating new + * regnode types. This is not the best solution, as shown later in this + * paragraph, but it is something that is feasible. We could have a regnode + * for ANYOF_INVERT, for example. A complication of this is that the regexec.c + * REGINCLASS macro assumes that it can just use the bitmap if no flags are + * set. This would have to be changed to add extra tests for the node type, or + * a special bit reserved that means unspecified special handling, and then the + * node-type would be used internally to sort that out. So we could gain a bit + * by having an ANYOF_SPECIAL bit, and a node type for INVERT, and another for + * POSIXL, and still another for INVERT_POSIXL. This example illustrates one + * problem with this, a combinatorial explosion of node types. The one node + * type khw can think of that doesn't have this explosion issue is + * ANYOF_LOC_REQ_UTF8, but you'd do this only if you haven't done the 2-bit + * field trick above. This bit is a natural candidate for being a separate + * node type because it is a specialization of the current ANYOFL, and because + * no other ANYOFL-only bits are set when it is; also most of its uses are + * actually outside the reginclass() function, so this could be done with no + * performance penalty. But again, the 2-bit field trick combines this bit so + * it doesn't take up space anyway. Another issue when turning a bit into a + * node type, is that a SSC may use that bit -- not just a regular ANYOF[DL]?. + * In the case of ANYOF_LOC_REQ_UTF8, the only likely problem is accurately + * settting the SSC node-type to the new one, which would likely involve + * S_ssc_or and S_ssc_and, and not how the SSC currently gets set to ANYOFL. + * + * Another possibility is to instead rename the ANYOF_POSIXL bit to be + * ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits beyond what a + * regular one does. That's what it effectively means now, with the extra + * space all for the POSIX class bits. But those classes actually only occupy + * 30 bits, so the 2-bit field or 2 of the locale bits could be moved to that + * extra space. The downside of this is that ANYOFL nodes with whichever of + * the bits get moved would have to have the extra space always allocated. + * + * One could completely remove ANYOFL_LARGE and make all ANYOFL nodes large. + * The 30 bits in the extra word would indicate if a posix class should be + * looked up or not. There isn't an SSC problem as all SSCs are this large + * anyway, and the SSC could be set to this node type. REGINCLASS would have + * to be modified so that if the node type were this, it would call + * reginclass(), as the flag bit that indicates to do this now would be gone. + * If the 2-bit field is used and moved to the larger structure, this would + * free up a total of 4 bits. If this were done, we could create an + * ANYOF_INVERT node-type without a combinatorial explosion, getting us to 5 + * bits. And, keep in mind that ANYOF_MATCHES_ALL_ABOVE_BITMAP is solely for + * performance, so could be removed. The other performance-related bits are + * shareable with bits that are required. * * Several flags are not used in synthetic start class (SSC) nodes, so could be * shared should new flags be needed for SSCs, like SSC_MATCHES_EMPTY_STRING @@ -443,23 +522,32 @@ struct regnode_ssc { * then. Only set under /l; never in an SSC */ #define ANYOF_LOC_FOLD 0x04 +/* If set, ANYOF_LOC_FOLD is also set, and there are potential matches that + * will be valid only if the locale is a UTF-8 one. */ +#define ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES 0x08 + /* If set, means to warn if runtime locale isn't a UTF-8 one. Only under /l. - * If set, none of INVERT, LOC_FOLD, POSIXL, HAS_NONBITMAP_NON_UTF8_MATCHES can + * If set, none of INVERT, LOC_FOLD, POSIXL, + * ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP can * be set. Can be in an SSC */ -#define ANYOF_LOC_REQ_UTF8 0x08 +#define ANYOF_LOC_REQ_UTF8 0x10 /* If set, the node matches every code point NUM_ANYOF_CODE_POINTS and above. * Can be in an SSC */ -#define ANYOF_MATCHES_ALL_ABOVE_BITMAP 0x10 +#define ANYOF_MATCHES_ALL_ABOVE_BITMAP 0x20 -/* If set, the node can match something outside the bitmap that isn't in utf8; - * never set under /d nor in an SSC */ -#define ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES 0x20 - -/* Are there things outside the bitmap that will match only if the target - * string is encoded in UTF-8? (This is not set if ANYOF_ABOVE_BITMAP_ALL is - * set). Can be in SSC */ -#define ANYOF_HAS_UTF8_NONBITMAP_MATCHES 0x40 +/* Shared bit: + * Under /d it means the ANYOFD node matches more things if the target + * string is encoded in UTF-8; any such things will be non-ASCII, + * characters that are < 256, and can be accessed via the swash. + * When not under /d, it means the ANYOF node contains a user-defined + * property that wasn't yet defined at the time the regex was compiled, + * and so must be looked up at runtime, by creating a swash + * (These uses are mutually exclusive because a user-defined property is + * specified by \p{}, and \p{} implies /u which deselects /d). The long macro + * name is to make sure that you are cautioned about its shared nature. Only + * the non-/d meaning can be in an SSC */ +#define ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP 0x40 /* Shared bit: * Under /d it means the ANYOFD node matches all non-ASCII Latin1 @@ -479,8 +567,7 @@ struct regnode_ssc { /* These are the flags that apply to both regular ANYOF nodes and synthetic * start class nodes during construction of the SSC. During finalization of * the SSC, other of the flags may get added to it */ -#define ANYOF_COMMON_FLAGS ( ANYOF_HAS_UTF8_NONBITMAP_MATCHES \ - |ANYOF_LOC_REQ_UTF8) +#define ANYOF_COMMON_FLAGS ANYOF_LOC_REQ_UTF8 /* Character classes for node->classflags of ANYOF */ /* Should be synchronized with a table in regprop() */ diff --git a/regen/charset_translations.pl b/regen/charset_translations.pl index 9696560..b37c3cd 100644 --- a/regen/charset_translations.pl +++ b/regen/charset_translations.pl @@ -2,6 +2,10 @@ use strict; use warnings; +# WARNING: This must be kept in sync with the UTF8_MAXBYTES value in +# utfebcdic.h +$CHARSET_TRANSLATIONS::UTF_EBCDIC_MAXBYTES = 14; + # Utilities for various character set issues. Currently handles ASCII and # EBCDIC only. It is trivial to add support for new EBCDIC code pages (unless # they have identical variant character signatures as existing ones, and there @@ -234,12 +238,13 @@ sub get_I8_2_utf($) { sub _UTF_START_MASK($) { # Internal my $len = shift; - return ((($len) >= 6) ? 0x01 : (0x1F >> (($len)-2))); + return (($len >= 7) ? 0x00 : (0x1F >> ($len - 2))); } sub _UTF_START_MARK($) { # Internal - return (0xFF & (0xFE << (7-(shift)))); + my $len = shift; + return (($len > 7) ? 0xFF : (0xFF & (0xFE << (7- $len)))); } sub cp_2_utfbytes($$) { @@ -269,7 +274,9 @@ sub cp_2_utfbytes($$) { $ucp < 0x4000 ? 3 : $ucp < 0x40000 ? 4 : $ucp < 0x400000 ? 5 : - $ucp < 0x4000000 ? 6 : 7; + $ucp < 0x4000000 ? 6 : + $ucp < 0x40000000? 7 : + $CHARSET_TRANSLATIONS::UTF_EBCDIC_MAXBYTES; my @str; for (1 .. $len - 1) { diff --git a/regen/ebcdic.pl b/regen/ebcdic.pl index b726793..fa8a051 100644 --- a/regen/ebcdic.pl +++ b/regen/ebcdic.pl @@ -10,31 +10,34 @@ require 'regen/charset_translations.pl'; my $out_fh = open_new('ebcdic_tables.h', '>', {style => '*', by => $0, }); -sub output_table ($$) { +sub output_table ($$;$) { my $table_ref = shift; my $name = shift; # Tables in hex easier to debug, but don't fit into 80 columns - my $print_in_hex = 0; + my $print_in_hex = shift // 1; die "Requres 256 entries in table $name, got @$table_ref" if @$table_ref != 256; print $out_fh "EXTCONST U8 $name\[\] = {\n"; - print $out_fh "/* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F */\n" if $print_in_hex; + my $column_numbers= "/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/\n"; + print $out_fh $column_numbers if $print_in_hex; for my $i (0 .. 255) { if ($print_in_hex) { - printf $out_fh "/* %X_ */ ", $i / 16 if $i % 16 == 0; - printf $out_fh " 0x%02X", $table_ref->[$i]; + # No row headings, so will fit in 80 cols. + #printf $out_fh "/* %X_ */ ", $i / 16 if $i % 16 == 0; + printf $out_fh "0x%02X", $table_ref->[$i]; } else { printf $out_fh "%4d", $table_ref->[$i]; } - printf $out_fh " /* %X_ */", $i / 16 if $print_in_hex && $i % 16 == 15; print $out_fh ",", if $i < 255; + #print $out_fh ($i < 255) ? "," : " "; + #printf $out_fh " /* %X_ */", $i / 16 if $print_in_hex && $i % 16 == 15; print $out_fh "\n" if $i % 16 == 15; } - print $out_fh "/* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F */\n" if $print_in_hex; + print $out_fh $column_numbers if $print_in_hex; print $out_fh "};\n\n"; } @@ -99,7 +102,11 @@ END # order 1-bits (up to 7) for my $i (0xC0 .. 255) { my $count; - if (($i & 0b11111110) == 0b11111110) { + if ($i == 0b11111111) { + no warnings 'once'; + $count = $CHARSET_TRANSLATIONS::UTF_EBCDIC_MAXBYTES; + } + elsif (($i & 0b11111110) == 0b11111110) { $count= 7; } elsif (($i & 0b11111100) == 0b11111100) { @@ -129,7 +136,8 @@ END * entries marked 9 in tr16 are continuation bytes and are marked as length 1 * here so that we can recover. */ END - output_table(\@utf8skip, "PL_utf8skip"); + output_table(\@utf8skip, "PL_utf8skip", 0); # The 0 means don't print + # in hex } use feature 'unicode_strings'; diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl index acd1f91..baf25f1 100644 --- a/regen/unicode_constants.pl +++ b/regen/unicode_constants.pl @@ -158,6 +158,22 @@ printf $out_fh "\n/* The number of code points not matching \\pC */\n" . "#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C %d\n", 0x110000 - $count; +# If this release has both the CWCM and CWCF properties, find the highest code +# point which changes under any case change. We can use this to short-circuit +# code +my @cwcm = prop_invlist('CWCM'); +if (@cwcm) { + my @cwcf = prop_invlist('CWCF'); + if (@cwcf) { + my $max = ($cwcm[-1] < $cwcf[-1]) + ? $cwcf[-1] + : $cwcm[-1]; + printf $out_fh "\n/* The highest code point that has any type of case change */\n" + . "#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x%X\n", + $max - 1; + } +} + print $out_fh "\n#endif /* H_UNICODE_CONSTANTS */\n"; read_only_bottom_close_and_rename($out_fh); diff --git a/regexec.c b/regexec.c index 85c31a6..afe87a5 100644 --- a/regexec.c +++ b/regexec.c @@ -654,7 +654,7 @@ Perl_re_intuit_start(pTHX_ "Intuit: trying to determine minimum start position...\n")); /* for now, assume that all substr offsets are positive. If at some point - * in the future someone wants to do clever things with look-behind and + * in the future someone wants to do clever things with lookbehind and * -ve offsets, they'll need to fix up any code in this function * which uses these offsets. See the thread beginning * <20140113145929.GF27210@iabyn.com> @@ -2683,7 +2683,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, U32 n = 0; max = -1; /* calculate the right-most part of the string covered - * by a capture. Due to look-ahead, this may be to + * by a capture. Due to lookahead, this may be to * the right of $&, so we have to scan all captures */ while (n <= prog->lastparen) { if (prog->offs[n].end > max) @@ -2704,7 +2704,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, U32 n = 0; min = max; /* calculate the left-most part of the string covered - * by a capture. Due to look-behind, this may be to + * by a capture. Due to lookbehind, this may be to * the left of $&, so we have to scan all captures */ while (min && n <= prog->lastparen) { if ( prog->offs[n].start != -1 @@ -2820,6 +2820,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, startpos = stringarg; + /* set these early as they may be used by the HOP macros below */ + reginfo->strbeg = strbeg; + reginfo->strend = strend; + reginfo->is_utf8_target = cBOOL(utf8_target); + if (prog->intflags & PREGf_GPOS_SEEN) { MAGIC *mg; @@ -2847,20 +2852,23 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, */ if (prog->intflags & PREGf_ANCH_GPOS) { - startpos = reginfo->ganch - prog->gofs; - if (startpos < - ((flags & REXEC_FAIL_ON_UNDERFLOW) ? stringarg : strbeg)) - { - DEBUG_r(PerlIO_printf(Perl_debug_log, - "fail: ganch-gofs before earliest possible start\n")); - return 0; + if (prog->gofs) { + startpos = HOPBACKc(reginfo->ganch, prog->gofs); + if (!startpos || + ((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg)) + { + DEBUG_r(PerlIO_printf(Perl_debug_log, + "fail: ganch-gofs before earliest possible start\n")); + return 0; + } } + else + startpos = reginfo->ganch; } else if (prog->gofs) { - if (startpos - prog->gofs < strbeg) + startpos = HOPBACKc(startpos, prog->gofs); + if (!startpos) startpos = strbeg; - else - startpos -= prog->gofs; } else if (prog->intflags & PREGf_GPOS_FLOAT) startpos = strbeg; @@ -2943,13 +2951,10 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, reginfo->prog = rx; /* Yes, sorry that this is confusing. */ reginfo->intuit = 0; - reginfo->is_utf8_target = cBOOL(utf8_target); reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx)); reginfo->warned = FALSE; - reginfo->strbeg = strbeg; reginfo->sv = sv; reginfo->poscache_maxiter = 0; /* not yet started a countdown */ - reginfo->strend = strend; /* see how far we have to get to not match where we matched before */ reginfo->till = stringarg + minend; @@ -3088,7 +3093,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, /* For anchored \G, the only position it can match from is * (ganch-gofs); we already set startpos to this above; if intuit * moved us on from there, we can't possibly succeed */ - assert(startpos == reginfo->ganch - prog->gofs); + assert(startpos == HOPBACKc(reginfo->ganch, prog->gofs)); if (s == startpos && regtry(reginfo, &s)) goto got_it; goto phooey; @@ -5805,8 +5810,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) } else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) { if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), - (U8) EIGHT_BIT_UTF8_TO_NATIVE(nextchr, - *(locinput + 1)))))) + EIGHT_BIT_UTF8_TO_NATIVE(nextchr, + *(locinput + 1)))))) { sayNO; } @@ -8727,11 +8732,27 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const { match = TRUE; /* Everything above the bitmap matches */ } - else if ((flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES) - || (utf8_target && (flags & ANYOF_HAS_UTF8_NONBITMAP_MATCHES)) - || ((flags & ANYOF_LOC_FOLD) - && IN_UTF8_CTYPE_LOCALE - && ARG(n) != ANYOF_ONLY_HAS_BITMAP)) + /* Here doesn't match everything above the bitmap. If there is + * some information available beyond the bitmap, we may find a + * match in it. If so, this is most likely because the code point + * is outside the bitmap range. But rarely, it could be because of + * some other reason. If so, various flags are set to indicate + * this possibility. On ANYOFD nodes, there may be matches that + * happen only when the target string is UTF-8; or for other node + * types, because runtime lookup is needed, regardless of the + * UTF-8ness of the target string. Finally, under /il, there may + * be some matches only possible if the locale is a UTF-8 one. */ + else if ( ARG(n) != ANYOF_ONLY_HAS_BITMAP + && ( c >= NUM_ANYOF_CODE_POINTS + || ( (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP) + && ( UNLIKELY(OP(n) != ANYOFD) + || (utf8_target && ! isASCII_uni(c) +# if NUM_ANYOF_CODE_POINTS > 256 + && c < 256 +# endif + ))) + || (( flags & ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES) + && IN_UTF8_CTYPE_LOCALE))) { SV* only_utf8_locale = NULL; SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0, diff --git a/scope.c b/scope.c index 037bbc0..c08eda0 100644 --- a/scope.c +++ b/scope.c @@ -131,8 +131,9 @@ Perl_markstack_grow(pTHX) Renew(PL_markstack, newmax, I32); PL_markstack_max = PL_markstack + newmax; PL_markstack_ptr = PL_markstack + oldmax; - DEBUG_s(PerlIO_printf(Perl_debug_log, "MARK grow %p %d by %d\n", - PL_markstack_ptr, *PL_markstack_ptr, oldmax)); + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, + "MARK grow %p %"IVdf" by %"IVdf"\n", + PL_markstack_ptr, (IV)*PL_markstack_ptr, (IV)oldmax))); return PL_markstack_ptr; } @@ -297,6 +298,19 @@ Perl_save_set_svflags(pTHX_ SV* sv, U32 mask, U32 val) SS_ADD_END(4); } +/* +=for apidoc save_gp + +Saves the current GP of gv on the save stack to be restored on scope exit. + +If empty is true, replace the GP with a new GP. + +If empty is false, mark gv with GVf_INTRO so the next reference +assigned is localized, which is how C< local *foo = $someref; > works. + +=cut +*/ + void Perl_save_gp(pTHX_ GV *gv, I32 empty) { @@ -840,9 +854,18 @@ Perl_leave_scope(pTHX_ I32 base) *svp = ARG0_SV; SvREFCNT_dec(sv); if (UNLIKELY(SvSMAGICAL(ARG0_SV))) { + /* mg_set could die, skipping the freeing of ARG0_SV and + * refsv; Ensure that they're always freed in that case */ + dSS_ADD; + SS_ADD_PTR(ARG0_SV); + SS_ADD_UV(SAVEt_FREESV); + SS_ADD_PTR(refsv); + SS_ADD_UV(SAVEt_FREESV); + SS_ADD_END(4); PL_localizing = 2; mg_set(ARG0_SV); PL_localizing = 0; + break; } SvREFCNT_dec_NN(ARG0_SV); SvREFCNT_dec(refsv); @@ -897,23 +920,25 @@ Perl_leave_scope(pTHX_ I32 base) case SAVEt_AV: /* array reference */ SvREFCNT_dec(GvAV(ARG1_GV)); GvAV(ARG1_GV) = ARG0_AV; + avhv_common: if (UNLIKELY(SvSMAGICAL(ARG0_SV))) { + /* mg_set might die, so make sure ARG1 isn't leaked */ + dSS_ADD; + SS_ADD_PTR(ARG1_SV); + SS_ADD_UV(SAVEt_FREESV); + SS_ADD_END(2); PL_localizing = 2; mg_set(ARG0_SV); PL_localizing = 0; + break; } SvREFCNT_dec_NN(ARG1_GV); break; case SAVEt_HV: /* hash reference */ SvREFCNT_dec(GvHV(ARG1_GV)); GvHV(ARG1_GV) = ARG0_HV; - if (UNLIKELY(SvSMAGICAL(ARG0_SV))) { - PL_localizing = 2; - mg_set(ARG0_SV); - PL_localizing = 0; - } - SvREFCNT_dec_NN(ARG1_GV); - break; + goto avhv_common; + case SAVEt_INT_SMALL: *(int*)ARG0_PTR = (int)(uv >> SAVE_TIGHT_SHIFT); break; diff --git a/sv.c b/sv.c index 8fad8be..f36a724 100644 --- a/sv.c +++ b/sv.c @@ -12332,6 +12332,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p U8* v = vhex; /* working pointer to vhex */ U8* vend; /* pointer to one beyond last digit of vhex */ U8* vfnz = NULL; /* first non-zero */ + U8* vlnz = NULL; /* last non-zero */ const bool lower = (c == 'a'); /* At output the values of vhex (up to vend) will * be mapped through the xdig to get the actual @@ -12339,6 +12340,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p const char* xdig = PL_hexdigit; int zerotail = 0; /* how many extra zeros to append */ int exponent = 0; /* exponent of the floating point input */ + bool hexradix = FALSE; /* should we output the radix */ /* XXX: denormals, NaN, Inf. * @@ -12363,7 +12365,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p # endif #endif - if (fv < 0) + if (fv < 0 + || Perl_signbit(nv) + ) *p++ = '-'; else if (plus) *p++ = plus; @@ -12385,8 +12389,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p } if (vfnz) { - U8* vlnz = NULL; /* The last non-zero. */ - /* Find the last non-zero xdigit. */ for (v = vend - 1; v >= vhex; v--) { if (*v) { @@ -12446,9 +12448,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p v = vhex; *p++ = xdig[*v++]; - /* The radix is always output after the first - * non-zero xdigit, or if alt. */ - if (vfnz < vlnz || alt) { + /* If there are non-zero xdigits, the radix + * is output after the first one. */ + if (vfnz < vlnz) { + hexradix = TRUE; + } + } + else { + *p++ = '0'; + exponent = 0; + zerotail = precis; + } + + /* The radix is always output if precis, or if alt. */ + if (precis > 0 || alt) { + hexradix = TRUE; + } + + if (hexradix) { #ifndef USE_LOCALE_NUMERIC *p++ = '.'; #else @@ -12464,17 +12481,17 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p } RESTORE_LC_NUMERIC(); #endif - } + } + if (vlnz) { while (v <= vlnz) *p++ = xdig[*v++]; - - while (zerotail--) - *p++ = '0'; } - else { + + if (zerotail > 0) { + while (zerotail--) { *p++ = '0'; - exponent = 0; + } } elen = p - PL_efloatbuf; diff --git a/sv.h b/sv.h index 313bfb8..33a0b7b 100644 --- a/sv.h +++ b/sv.h @@ -838,7 +838,7 @@ Set the current length of the string which is in the SV. See C> and C>. =for apidoc Am|void|SvLEN_set|SV* sv|STRLEN len -Set the actual length of the string which is in the SV. See C>. +Set the size of the string buffer for the SV. See C>. =cut */ diff --git a/t/lib/croak/pp_sys b/t/lib/croak/pp_sys new file mode 100644 index 0000000..739b7e9 --- /dev/null +++ b/t/lib/croak/pp_sys @@ -0,0 +1,16 @@ +__END__ +# pp_sys.c +# NAME pipe() croaks on bad left side [perl #126480] +# SKIP ? use Config; !$Config{d_pipe} && "No pipe() available" +my $fh; +pipe($$5, $fh) +EXPECT +Bad symbol for filehandle at - line 3. +######## +# NAME pipe() croaks on bad right side [perl #126480] +# SKIP ? use Config; !$Config{d_pipe} && "No pipe() available" +my $fh; +pipe($fh, $$5) +EXPECT +Bad symbol for filehandle at - line 2. +######## diff --git a/t/lib/cygwin.t b/t/lib/cygwin.t index a619e40..ba86170 100644 --- a/t/lib/cygwin.t +++ b/t/lib/cygwin.t @@ -52,18 +52,16 @@ is(Cygwin::mount_flags("/cygdrive") =~ /,cygdrive/, 1, "check cygdrive mount_fl # Cygdrive mount prefix my @flags = split(/,/, Cygwin::mount_flags('/cygdrive')); my $prefix = pop(@flags); -ok($prefix, "cygdrive mount prefix = " . (($prefix) ? $prefix : '')); -chomp(my $prefix2 = `df -a | grep -i '^c: ' | cut -d% -f2 | xargs`); -# we get something like "C: - - - - /cygdrive" if this isn't the entry -# df displays free space info for -$prefix2 =~ s/.* //; -$prefix2 =~ s/\/c$//i; -SKIP: -{ - $prefix2 - or skip("No C: entry found in df output", 1); - is($prefix, $prefix2, 'cygdrive mount prefix'); +ok($prefix, "cygdrive mount prefix = " . (($prefix) ? $prefix : '')); +my $prefix2 = readlink "/proc/cygdrive"; +unless ($prefix2) { + # fallback to old Cygwin, the drive need not actually exist, so + # this will always work (but might return the wrong prefix if the + # user re-mounted C:\ + chomp($prefix2 = `cygpath C:`); + $prefix2 = substr($prefix2, 0, -1-(length($prefix2)>2)); } +is($prefix, $prefix2, 'cygdrive mount prefix2 = ' . $prefix2); my @mnttbl = Cygwin::mount_table(); ok(@mnttbl > 0, "non empty mount_table"); diff --git a/t/lib/warnings/doop b/t/lib/warnings/doop index 74c3e90..bcc85a3 100644 --- a/t/lib/warnings/doop +++ b/t/lib/warnings/doop @@ -5,3 +5,33 @@ $_ = "\x80 \xff" ; chop ; EXPECT ######## +# NAME deprecation of logical bit operations with above ff code points +$_ = "\xFF" & "\x{100}"; # Above ff second +$_ = "\xFF" | "\x{101}"; +$_ = "\xFF" ^ "\x{102}"; +$_ = "\x{100}" & "\x{FF}"; # Above ff first +$_ = "\x{101}" | "\x{FF}"; +$_ = "\x{102}" ^ "\x{FF}"; +$_ = "\x{100}" & "\x{103}"; # both above ff has just one message raised +$_ = "\x{101}" | "\x{104}"; +$_ = "\x{102}" ^ "\x{105}"; +no warnings 'deprecated'; +$_ = "\xFF" & "\x{100}"; +$_ = "\xFF" | "\x{101}"; +$_ = "\xFF" ^ "\x{101}"; +$_ = "\x{100}" & "\x{FF}"; +$_ = "\x{101}" | "\x{FF}"; +$_ = "\x{102}" ^ "\x{FF}"; +$_ = "\x{100}" & "\x{103}"; +$_ = "\x{101}" | "\x{104}"; +$_ = "\x{102}" ^ "\x{105}"; +EXPECT +Use of strings with code points over 0xFF as arguments to bitwise and (&) operator is deprecated at - line 1. +Use of strings with code points over 0xFF as arguments to bitwise or (|) operator is deprecated at - line 2. +Use of strings with code points over 0xFF as arguments to bitwise xor (^) operator is deprecated at - line 3. +Use of strings with code points over 0xFF as arguments to bitwise and (&) operator is deprecated at - line 4. +Use of strings with code points over 0xFF as arguments to bitwise or (|) operator is deprecated at - line 5. +Use of strings with code points over 0xFF as arguments to bitwise xor (^) operator is deprecated at - line 6. +Use of strings with code points over 0xFF as arguments to bitwise and (&) operator is deprecated at - line 7. +Use of strings with code points over 0xFF as arguments to bitwise or (|) operator is deprecated at - line 8. +Use of strings with code points over 0xFF as arguments to bitwise xor (^) operator is deprecated at - line 9. diff --git a/t/lib/warnings/op b/t/lib/warnings/op index b253741..89de94f 100644 --- a/t/lib/warnings/op +++ b/t/lib/warnings/op @@ -824,6 +824,7 @@ use warnings 'parenthesis' ; my $a, $b = (1,2); my @foo,%bar, $quux; # there's a TAB here my $x, $y or print; +my $p, *q; no warnings 'parenthesis' ; my $c, $d = (1,2); EXPECT @@ -833,6 +834,7 @@ Parentheses missing around "my" list at - line 4. # op.c use warnings 'parenthesis' ; our $a, $b = (1,2); +our $p, *q; no warnings 'parenthesis' ; our $c, $d = (1,2); EXPECT @@ -842,11 +844,13 @@ Parentheses missing around "our" list at - line 3. use warnings 'parenthesis' ; local $a, $b = (1,2); local *f, *g; +local $p, *q; no warnings 'parenthesis' ; local $c, $d = (1,2); EXPECT Parentheses missing around "local" list at - line 3. Parentheses missing around "local" list at - line 4. +Parentheses missing around "local" list at - line 5. ######## # op.c use warnings 'bareword' ; diff --git a/t/lib/warnings/pp b/t/lib/warnings/pp index ab8f951..3324ccc 100644 --- a/t/lib/warnings/pp +++ b/t/lib/warnings/pp @@ -128,3 +128,11 @@ use utf8 ; $_ = "\x80 \xff" ; reverse ; EXPECT +######## +# NAME deprecation of complement with above ff code points +$_ = ~ "\xff"; +$_ = ~ "\x{100}"; +EXPECT +OPTION regex +Use of strings with code points over 0xFF as arguments to 1's complement \(~\) operator is deprecated at - line \d+. +Use of code point 0xFF+EFF is deprecated; the permissible max is 0x7F+ at - line \d+. diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8 index 809785b..e8c8527 100644 --- a/t/lib/warnings/utf8 +++ b/t/lib/warnings/utf8 @@ -88,18 +88,13 @@ Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5. ######## -BEGIN { - if (ord('A') == 193) { - print "SKIPPED\n# ebcdic platforms can't handle this large a code point"; - exit 0; - } -} use warnings 'utf8'; +no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines my $big_nonUnicode = uc(chr(0x8000_0000)); no warnings 'non_unicode'; my $big_nonUnicode = uc(chr(0x8000_0000)); EXPECT -Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 8. +Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 3. ######## use warnings 'utf8'; my $d7ff = lc pack("U", 0xD7FF); @@ -359,92 +354,149 @@ my $file = tempfile(); open(my $fh, "+>:utf8", $file); print $fh "\x{D7FF}", "\n"; print $fh "\x{D800}", "\n"; +print $fh "\x{D900}", "\n"; +print $fh "\x{DA00}", "\n"; +print $fh "\x{DB00}", "\n"; +print $fh "\x{DC00}", "\n"; +print $fh "\x{DD00}", "\n"; +print $fh "\x{DE00}", "\n"; +print $fh "\x{DF00}", "\n"; print $fh "\x{DFFF}", "\n"; print $fh "\x{E000}", "\n"; print $fh "\x{FDCF}", "\n"; print $fh "\x{FDD0}", "\n"; +print $fh "\x{FDD1}", "\n"; print $fh "\x{FDEF}", "\n"; print $fh "\x{FDF0}", "\n"; +print $fh "\x{FDFE}", "\n"; +print $fh "\x{FDFF}", "\n"; +print $fh "\x{FE00}", "\n"; print $fh "\x{FEFF}", "\n"; print $fh "\x{FFFD}", "\n"; print $fh "\x{FFFE}", "\n"; print $fh "\x{FFFF}", "\n"; print $fh "\x{10000}", "\n"; +print $fh "\x{1FFFD}", "\n"; print $fh "\x{1FFFE}", "\n"; print $fh "\x{1FFFF}", "\n"; +print $fh "\x{20000}", "\n"; +print $fh "\x{2FFFD}", "\n"; print $fh "\x{2FFFE}", "\n"; print $fh "\x{2FFFF}", "\n"; +print $fh "\x{30000}", "\n"; +print $fh "\x{3FFFD}", "\n"; print $fh "\x{3FFFE}", "\n"; print $fh "\x{3FFFF}", "\n"; +print $fh "\x{40000}", "\n"; +print $fh "\x{4FFFD}", "\n"; print $fh "\x{4FFFE}", "\n"; print $fh "\x{4FFFF}", "\n"; +print $fh "\x{50000}", "\n"; +print $fh "\x{5FFFD}", "\n"; print $fh "\x{5FFFE}", "\n"; print $fh "\x{5FFFF}", "\n"; +print $fh "\x{60000}", "\n"; +print $fh "\x{6FFFD}", "\n"; print $fh "\x{6FFFE}", "\n"; print $fh "\x{6FFFF}", "\n"; +print $fh "\x{70000}", "\n"; +print $fh "\x{7FFFD}", "\n"; print $fh "\x{7FFFE}", "\n"; print $fh "\x{7FFFF}", "\n"; +print $fh "\x{80000}", "\n"; +print $fh "\x{8FFFD}", "\n"; print $fh "\x{8FFFE}", "\n"; print $fh "\x{8FFFF}", "\n"; +print $fh "\x{90000}", "\n"; +print $fh "\x{9FFFD}", "\n"; print $fh "\x{9FFFE}", "\n"; print $fh "\x{9FFFF}", "\n"; +print $fh "\x{A0000}", "\n"; +print $fh "\x{AFFFD}", "\n"; print $fh "\x{AFFFE}", "\n"; print $fh "\x{AFFFF}", "\n"; +print $fh "\x{B0000}", "\n"; +print $fh "\x{BFFFD}", "\n"; print $fh "\x{BFFFE}", "\n"; print $fh "\x{BFFFF}", "\n"; +print $fh "\x{C0000}", "\n"; +print $fh "\x{CFFFD}", "\n"; print $fh "\x{CFFFE}", "\n"; print $fh "\x{CFFFF}", "\n"; +print $fh "\x{D0000}", "\n"; +print $fh "\x{DFFFD}", "\n"; print $fh "\x{DFFFE}", "\n"; print $fh "\x{DFFFF}", "\n"; +print $fh "\x{E0000}", "\n"; +print $fh "\x{EFFFD}", "\n"; print $fh "\x{EFFFE}", "\n"; print $fh "\x{EFFFF}", "\n"; +print $fh "\x{F0000}", "\n"; +print $fh "\x{FFFFD}", "\n"; print $fh "\x{FFFFE}", "\n"; print $fh "\x{FFFFF}", "\n"; print $fh "\x{100000}", "\n"; +print $fh "\x{10FFFD}", "\n"; print $fh "\x{10FFFE}", "\n"; print $fh "\x{10FFFF}", "\n"; print $fh "\x{110000}", "\n"; +print $fh "\x{11FFFD}", "\n"; +print $fh "\x{11FFFE}", "\n"; +print $fh "\x{11FFFF}", "\n"; +print $fh "\x{120000}", "\n"; close $fh; EXPECT Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. -Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. -Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 10. -Unicode non-character U+FDEF is not recommended for open interchange in print at - line 11. -Unicode non-character U+FFFE is not recommended for open interchange in print at - line 15. -Unicode non-character U+FFFF is not recommended for open interchange in print at - line 16. -Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 18. -Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 19. -Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 20. -Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 21. -Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 22. -Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 23. -Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 24. -Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 25. -Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 26. -Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 27. -Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 28. -Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 29. -Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 30. -Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 31. -Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 32. -Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 33. -Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 34. -Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 35. -Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 36. -Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 37. -Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 38. -Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 39. -Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 40. -Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 41. -Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 42. -Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 43. -Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 44. -Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 45. -Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 46. -Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 47. -Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 49. -Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 50. -Code point 0x110000 is not Unicode, may not be portable in print at - line 51. +Unicode surrogate U+D900 is illegal in UTF-8 at - line 7. +Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8. +Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9. +Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10. +Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11. +Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12. +Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13. +Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14. +Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17. +Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18. +Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19. +Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26. +Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27. +Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30. +Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31. +Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34. +Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35. +Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38. +Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39. +Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42. +Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43. +Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46. +Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47. +Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50. +Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51. +Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54. +Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55. +Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58. +Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59. +Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62. +Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63. +Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66. +Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67. +Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70. +Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71. +Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74. +Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75. +Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78. +Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79. +Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82. +Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83. +Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86. +Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87. +Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90. +Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91. +Code point 0x110000 is not Unicode, may not be portable in print at - line 92. +Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93. +Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94. +Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95. +Code point 0x120000 is not Unicode, may not be portable in print at - line 96. ######## require "../test.pl"; use warnings 'utf8'; @@ -673,3 +725,35 @@ $a = fc("\x{102}"); $a = uc("\x{103}"); $a = ucfirst("\x{104}"); EXPECT +######## +# NAME Deprecation of too-large code points +require "../test.pl"; +use warnings 'non_unicode'; +my $max_cp = ~0 >> 1; +my $max_char = chr $max_cp; +my $to_warn_cp = $max_cp + 1; +my $to_warn_char = chr $to_warn_cp; +$max_char =~ /[\x{110000}\P{Unassigned}]/; +$to_warn_char =~ /[\x{110000}\P{Unassigned}]/; +my $temp = qr/$max_char/; +$temp = qr/$to_warn_char/; +$temp = uc($max_char); +$temp = uc($to_warn_char); +my $file = tempfile(); +open(my $fh, "+>:utf8", $file); +print $fh $max_char, "\n"; +print $fh $to_warn_char, "\n"; +close $fh; +EXPECT +OPTION regex +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in pattern match \(m//\) at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+. +Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+. +Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+. +Code point 0x7F+ is not Unicode, may not be portable in print at - line \d+. +Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in print at - line \d+. diff --git a/t/loc_tools.pl b/t/loc_tools.pl index 86d8e48..bead7c7 100644 --- a/t/loc_tools.pl +++ b/t/loc_tools.pl @@ -12,7 +12,7 @@ # for this file, and are not to be used by outside callers. eval { require POSIX; import POSIX 'locale_h'; }; -my $has_posix_locales = defined &POSIX::LC_CTYPE; +$has_locale_h = ! $@; sub _trylocale ($$$$) { # For use only by other functions in this file! @@ -92,7 +92,6 @@ my $max_bad_category_number = -1000000; # 6 => 'CTYPE', # where 6 is the value of &POSIX::LC_CTYPE my %category_name; -eval { require POSIX; import POSIX 'locale_h'; }; unless ($@) { my $number_for_missing_category = $max_bad_category_number; foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { @@ -128,10 +127,11 @@ sub locales_enabled(;$) { # denoting a single category. # # If any of the individual categories specified by the optional parameter - # is all digits, it is taken to be the C enum for the category (e.g., - # &POSIX::LC_CTYPE). Otherwise it should be a string name of the - # category, like 'LC_TIME'. The initial 'LC_' is optional. It is a fatal - # error to call this with something that isn't a known category + # is all digits (and an optional leading minus), it is taken to be the C + # enum for the category (e.g., &POSIX::LC_CTYPE). Otherwise it should be + # a string name of the category, like 'LC_TIME'. The initial 'LC_' is + # optional. It is a fatal error to call this with something that isn't a + # known category use Config; @@ -139,7 +139,7 @@ sub locales_enabled(;$) { # I (khw) cargo-culted the '?' in the pattern on the # next line. && $Config{ccflags} !~ /\bD?NO_LOCALE\b/ - && $has_posix_locales; + && $has_locale_h; # Done with the global possibilities. Now check if any passed in category # is disabled. @@ -171,6 +171,9 @@ sub locales_enabled(;$) { return 0 if $number <= $max_bad_category_number || $Config{ccflags} =~ /\bD?NO_LOCALE_$name\b/; + + eval "defined &POSIX::LC_$name"; + return 0 if $@; } } @@ -203,10 +206,6 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the # UWIN seems to loop after taint tests, just skip for now return if ($^O =~ /^uwin/); - # Done this way in case this is 'required' in the caller before seeing if - # this is miniperl. - return unless $has_posix_locales; - _trylocale("C", $categories, \@Locale, $only_plays_well); _trylocale("POSIX", $categories, \@Locale, $only_plays_well); foreach (0..15) { @@ -254,8 +253,9 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the close(LOCALES); } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { - # OpenBSD doesn't have a locale executable, so reading /usr/share/locale - # is much easier and faster than the last resort method. + # OpenBSD doesn't have a locale executable, so reading + # /usr/share/locale is much easier and faster than the last resort + # method. opendir(LOCALES, '/usr/share/locale'); while ($_ = readdir(LOCALES)) { @@ -326,8 +326,7 @@ sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input # On z/OS, even locales marked as UTF-8 aren't. return 0 if ord "A" != 65; - return 0 if ! $has_posix_locales; - return 0 if ! locales_enabled('LC_CTYPE'); + return 0 unless locales_enabled('LC_CTYPE'); my $locale = shift; @@ -373,10 +372,11 @@ sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl # list of locales to try; if omitted, this # tries all locales it can find on the # platform + return unless locales_enabled('LC_CTYPE'); + my $locales_ref = shift; if (! defined $locales_ref) { - return if ! $has_posix_locales; my @locales = find_locales(&POSIX::LC_CTYPE(), 1 # Reject iffy locales. diff --git a/t/op/attrs.t b/t/op/attrs.t index 16e1fce..219db03 100644 --- a/t/op/attrs.t +++ b/t/op/attrs.t @@ -428,4 +428,23 @@ package _123817 { eval 'return my $x : m'; } +# [perl #126257] +# attributed lex var as function arg caused assertion failure + +package P126257 { + sub MODIFY_SCALAR_ATTRIBUTES {} + sub MODIFY_ARRAY_ATTRIBUTES {} + sub MODIFY_HASH_ATTRIBUTES {} + sub MODIFY_CODE_ATTRIBUTES {} + sub foo {} + eval { foo(my $x : bar); }; + ::is $@, "", "RT 126257 scalar"; + eval { foo(my @x : bar); }; + ::is $@, "", "RT 126257 array"; + eval { foo(my %x : bar); }; + ::is $@, "", "RT 126257 hash"; + eval { foo(sub : bar {}); }; + ::is $@, "", "RT 126257 sub"; +} + done_testing(); diff --git a/t/op/bop.t b/t/op/bop.t index 409c91b..14e57ba 100644 --- a/t/op/bop.t +++ b/t/op/bop.t @@ -4,6 +4,9 @@ # test the bit operators '&', '|', '^', '~', '<<', and '>>' # +use warnings; +no warnings 'deprecated'; + BEGIN { chdir 't' if -d 't'; @INC = '../lib'; @@ -136,9 +139,7 @@ is (sprintf("%vd", v120.300 ^ v200.400), '176.188'); # UTF8 ~ behaviour # -SKIP: { - skip "Complements exceed maximum representable on EBCDIC ", 5 if $::IS_EBCDIC; - +{ my @not36; for (0x100...0xFFF) { @@ -621,6 +622,7 @@ is $^A, "123", '~v0 clears vstring magic on retval'; # not necessarily the ideal behavior, but that is what is happening. if ($w == 64) { no warnings "portable"; + no warnings "overflow"; # prevent compile-time warning for ivsize=4 is(-1 << 1, 0xFFFF_FFFF_FFFF_FFFE, "neg UV (sic) left shift = 0xFF..E"); is(-1 >> 1, 0x7FFF_FFFF_FFFF_FFFF, diff --git a/t/op/chop.t b/t/op/chop.t index bdeaf0d..d24b9e0 100644 --- a/t/op/chop.t +++ b/t/op/chop.t @@ -7,7 +7,6 @@ BEGIN { } my $tests_count = 148; -$tests_count -= 2 if $::IS_EBCDIC; plan tests => $tests_count; $_ = 'abc'; @@ -249,31 +248,25 @@ foreach my $start (@chars) { ok(1, "extend sp in pp_chomp"); } -SKIP: { +{ # [perl #73246] chop doesn't support utf8 # the problem was UTF8_IS_START() didn't handle perl's extended UTF8 - skip("Not representable in EBCDIC", 2) if $::IS_EBCDIC; - # We use hex constants instead of literal chars to avoid compilation - # errors in EBCDIC. - my $first_char = 0x80000001; - my $second_char = 0x80000000; - my $utf = chr($first_char) . chr($second_char); + no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines + my $utf = "\x{80000001}\x{80000000}"; my $result = chop($utf); - is($utf, chr $first_char, "chopping high 'unicode'- remnant"); - is($result, chr $second_char, "chopping high 'unicode' - result"); + is($utf, "\x{80000001}", "chopping high 'unicode'- remnant"); + is($result, "\x{80000000}", "chopping high 'unicode' - result"); SKIP: { no warnings 'overflow'; # avoid compile-time warnings below on 32-bit architectures use Config; $Config{ivsize} >= 8 or skip("this build can't handle very large characters", 2); - my $first_char = 0xffffffffffffffff; - my $second_char = 0xfffffffffffffffe; - my $utf = chr($first_char) . chr($second_char); + my $utf = "\x{ffffffffffffffff}\x{fffffffffffffffe}"; my $result = chop $utf; - is($utf, chr $first_char, "chop even higher 'unicode' - remnant"); - is($result, chr $second_char, "chop even higher 'unicode' - result"); + is($utf, "\x{ffffffffffffffff}", "chop even higher 'unicode' - remnant"); + is($result, "\x{fffffffffffffffe}", "chop even higher 'unicode' - result"); } } diff --git a/t/op/heredoc.t b/t/op/heredoc.t index a239e92..dadf105 100644 --- a/t/op/heredoc.t +++ b/t/op/heredoc.t @@ -7,7 +7,7 @@ BEGIN { } use strict; -plan(tests => 39); +plan(tests => 40); # heredoc without newline (#65838) @@ -89,4 +89,13 @@ HEREDOC {}, "long terminator fails correctly" ); + + # this would read freed memory + fresh_perl_like( + qq(0<<<<""0\n\n), + # valgrind and asan reports an error between these two lines + qr/^Number found where operator expected at - line 1, near "<<""0"\s+\(Missing operator/, + {}, + "don't use an invalid oldoldbufptr" + ); } diff --git a/t/op/index.t b/t/op/index.t index 243cc1b..d1e46dc 100644 --- a/t/op/index.t +++ b/t/op/index.t @@ -129,9 +129,8 @@ is(rindex($a, "foo", ), 0); is (rindex($text, $search_octets), -1); } -SKIP: { - skip "UTF-EBCDIC is limited to 0x7fffffff", 3 if $::IS_EBCDIC; - +{ + no warnings 'deprecated'; # These are above IV_MAX on 32 bit machines my $a = eval q{"\x{80000000}"}; my $s = $a.'defxyz'; is(index($s, 'def'), 1, "0x80000000 is a single character"); diff --git a/t/op/int.t b/t/op/int.t index 9aad020..dda4908 100644 --- a/t/op/int.t +++ b/t/op/int.t @@ -4,9 +4,10 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; require './test.pl'; + require Config; } -plan 15; +plan 17; # compile time evaluation @@ -71,3 +72,14 @@ cmp_ok($y, '==', 4745162525730, 'compile time division, result of about 42 bits' $y = 279964589018079; $y = int($y/59); cmp_ok($y, '==', 4745162525730, 'run time divison, result of about 42 bits'); + +SKIP: +{ # see #126635 + my $large; + $large = eval "0xffff_ffff" if $Config::Config{ivsize} == 4; + $large = eval "0xffff_ffff_ffff_ffff" if $Config::Config{ivsize} == 8; + $large or skip "Unusual ivsize", 1; + for my $x ($large, -1) { + cmp_ok($x, "==", int($x), "check $x == int($x)"); + } +} diff --git a/t/op/lc.t b/t/op/lc.t index ffea0ae..a390c63 100644 --- a/t/op/lc.t +++ b/t/op/lc.t @@ -323,11 +323,6 @@ my $utf8_locale = find_utf8_ctype_locale(); SKIP: { skip 'Can\'t find a UTF-8 locale', 4*256 unless defined $utf8_locale; - eval { require POSIX; import POSIX 'locale_h'; }; - unless (defined &POSIX::LC_CTYPE) { - skip "no POSIX (maybe no Fcntl, or no dynamic loading)", 4*256; - } - use feature qw( unicode_strings ); no locale; diff --git a/t/op/my.t b/t/op/my.t index 2dca46f..e76fc5e 100644 --- a/t/op/my.t +++ b/t/op/my.t @@ -149,5 +149,10 @@ is( $@, '', 'no errors while checking autovivification and persistence of hash r eval "my ()"; is( $@, '', "eval of my() passes"); +# RT #126844 +# This triggered a compile-time assert failure in rpeep() +eval 'my($a,$b),$x,my($c,$d)'; +pass("RT #126844"); + #Variable number of tests due to the way the while/for loops are tested now done_testing(); diff --git a/t/op/rand.t b/t/op/rand.t index b3bfdd4..b3df32e 100644 --- a/t/op/rand.t +++ b/t/op/rand.t @@ -24,184 +24,79 @@ use strict; use Config; require "./test.pl"; -plan(tests => 10); -my $reps = 15000; # How many times to try rand each time. +my $reps = 100_000; # How many times to try rand each time. # May be changed, but should be over 500. # The more the better! (But slower.) -sub bits ($) { - # Takes a small integer and returns the number of one-bits in it. - my $total; - my $bits = sprintf "%o", $_[0]; - while (length $bits) { - $total += (0,1,1,2,1,2,2,3)[chop $bits]; # Oct to bits - } - $total; -} +my $bits = 8; # how many significant bits we check on each random number +my $nslots = (1<< $bits); # how many different numbers + +plan(tests => 7 + $nslots); -# First, let's see whether randbits is set right +# First, let's see whether randbits is set right and that rand() returns +# an even distribution of values { - my($max, $min, $sum); # Characteristics of rand - my($off, $shouldbe); # Problems with randbits - my($dev, $bits); # Number of one bits - my $randbits = $Config{randbits}; - $max = $min = rand(1); + my $sum; + my @slots = (0) x $nslots; + my $prob = 1/$nslots; # probability of a particular slot being + # on a particular iteration + + # We are going to generate $reps random numbers, each in the range + # 0..$nslots-1. They should be evenly distributed. We use @slots to + # count the number of occurrences of each number. For each count, we + # check that it is in the range we expect. For example for reps = + # 100_000 and using 8 bits, we expect each count to be around + # 100_000/256 = 390. How much around it we tolerate depends on the + # standard deviation, and how many deviations we allow. If we allow + # 6-sigmas, then that means that in only 1 run in 506e6 will be get a + # failure by chance, assuming a fair random number generator. Given + # that we test each slot, the overall chance of a false negative in + # this test script is about 1 in 2e6, assuming 256 slots. + # + # the actual count in a slot should follow a binomial distribution + # (e.g. rolling 18 dice, we 'expect' to see 3 sixes, but there's + # actually a 24% chance of 3, a 20% change of 2 or 4, a 12% + # chance of 1 or 5, and a 4% chance of 0 or 6 of them). + # + # This makes it easy to calculate the expected mean a standard + # deviation; see + # https://en.wikipedia.org/wiki/Binomial_distribution#Variance + + my $mean = $reps * $prob; + my $stddev = sqrt($reps * $prob * (1 - $prob)); + my $sigma6 = $stddev * 6.0; # very unlikely to be outside that range + my $min = $mean - $sigma6; + my $max = $mean + $sigma6; + + note("reps=$reps; slots=$nslots; min=$min mean=$mean max=$max"); + for (1..$reps) { my $n = rand(1); if ($n < 0.0 or $n >= 1.0) { - print <= 1.0. -# Make sure \$Config{drand01} is a valid expression in the -# C-language, and produces values in the range [0.0,1.0). -# -# I give up. + diag(<= 1.0. +Make sure \$Config{drand01} is a valid expression in the +C-language, and produces values in the range [0.0,1.0). + +I give up. EOM exit; } - $sum += $n; - $bits += bits($n * 256); # Don't be greedy; 8 is enough - # It's too many if randbits is less than 8! - # But that should never be the case... I hope. - # Note: If you change this, you must adapt the - # formula for absolute standard deviation, below. - $max = $n if $n > $max; - $min = $n if $n < $min; - } - - - # This test checks for one of Perl's most frequent - # mis-configurations. Your system's documentation - # for rand(2) should tell you what value you need - # for randbits. Usually the diagnostic message - # has the right value as well. Just fix it and - # recompile, and you'll usually be fine. (The main - # reason that the diagnostic message might get the - # wrong value is that Config.pm is incorrect.) - # - unless (ok( !$max <= 0 or $max >= (2 ** $randbits))) {# Just in case... - print < 0); # Next more positive int - unless (is( $off, 0 )) { - $shouldbe = $Config{randbits} + $off; - print "# max=[$max] min=[$min]\n"; - print "# This perl was compiled with randbits=$randbits on $^O.\n"; - print "# Consider using randbits=$shouldbe instead.\n"; - # And skip the remaining tests; they would be pointless now. - print "# Skipping remaining tests until randbits is fixed.\n"; - exit; + $slots[int($n * $nslots)]++; } - - # This should always be true: 0 <= rand(1) < 1 - # If this test is failing, something is seriously wrong, - # either in perl or your system's rand function. - # - unless (ok( !($min < 0 or $max >= 1) )) { # Slightly redundant... - print "# min too low\n" if $min < 0; - print "# max too high\n" if $max >= 1; + for my $i (0..$nslots - 1) { + # this test should randomly fail very rarely. If it fails + # for you, try re-running this test script a few more times; + # if it goes away, it was likely a random (ha ha!) glitch. + # If you keep seeing failures, it means your random number + # generator is producing a very uneven spread of values. + ok($slots[$i] >= $min && $slots[$i] <= $max, "checking slot $i") + or diag("slot $i; count $slots[$i] outside expected range $min..$max"); } - - - # This is just a crude test. The average number produced - # by rand should be about one-half. But once in a while - # it will be relatively far away. Note: This test will - # occasionally fail on a perfectly good system! - # See the hints for test 4 to see why. - # - $sum /= $reps; - unless (ok( !($sum < 0.4 or $sum > 0.6) )) { - print "# Average random number is far from 0.5\n"; - } - - - # NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE - # This test will fail .006% of the time on a normal system. - # also - # This test asks you to see these hints 100% of the time! - # NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE - # - # There is probably no reason to be alarmed that - # something is wrong with your rand function. But, - # if you're curious or if you can't help being - # alarmed, keep reading. - # - # This is a less-crude test than test 3. But it has - # the same basic flaw: Unusually distributed random - # values should occasionally appear in every good - # random number sequence. (If you flip a fair coin - # twenty times every day, you'll see it land all - # heads about one time in a million days, on the - # average. That might alarm you if you saw it happen - # on the first day!) - # - # So, if this test failed on you once, run it a dozen - # times. If it keeps failing, it's likely that your - # rand is bogus. If it keeps passing, it's likely - # that the one failure was bogus. If it's a mix, - # read on to see about how to interpret the tests. - # - # The number printed in square brackets is the - # standard deviation, a statistical measure - # of how unusual rand's behavior seemed. It should - # fall in these ranges with these *approximate* - # probabilities: - # - # under 1 68.26% of the time - # 1-2 27.18% of the time - # 2-3 4.30% of the time - # over 3 0.26% of the time - # - # If the numbers you see are not scattered approximately - # (not exactly!) like that table, check with your vendor - # to find out what's wrong with your rand. Or with this - # algorithm. :-) - # - # Calculating absolute standard deviation for number of bits set - # (eight bits per rep) - $dev = abs ($bits - $reps * 4) / sqrt($reps * 2); - - ok( $dev < 4.0 ); - - if ($dev < 1.96) { - print "# Your rand seems fine. If this test failed\n"; - print "# previously, you may want to run it again.\n"; - } elsif ($dev < 2.575) { - print "# This is ok, but suspicious. But it will happen\n"; - print "# one time out of 25, more or less.\n"; - print "# You should run this test again to be sure.\n"; - } elsif ($dev < 3.3) { - print "# This is very suspicious. It will happen only\n"; - print "# about one time out of 100, more or less.\n"; - print "# You should run this test again to be sure.\n"; - } elsif ($dev < 3.9) { - print "# This is VERY suspicious. It will happen only\n"; - print "# about one time out of 1000, more or less.\n"; - print "# You should run this test again to be sure.\n"; - } else { - print "# This is VERY VERY suspicious.\n"; - print "# Your rand seems to be bogus.\n"; - } - print "#\n# If you are having random number troubles,\n"; - print "# see the hints within the test script for more\n"; - printf "# information on why this might fail. [ %.3f ]\n", $dev; } @@ -219,11 +114,9 @@ DIAG # within the range 0 - 100, and that the numbers produced # have a reasonably-large range among them. # - unless ( ok( !($min < 0 or $max >= 100 or ($max - $min) < 65) ) ) { - print "# min too low\n" if $min < 0; - print "# max too high\n" if $max >= 100; - print "# range too narrow\n" if ($max - $min) < 65; - } + cmp_ok($min, '>=', 0, "rand(100) >= 0"); + cmp_ok($max, '<', 100, "rand(100) < 100"); + cmp_ok($max - $min, '>=', 65, "rand(100) in 65 range"); # This test checks that rand without an argument @@ -239,7 +132,7 @@ DIAG # This checks that rand without an argument is not # rand($_). (In case somebody got overzealous.) # - ok($r < 1, 'rand() without args is under 1'); + cmp_ok($r, '<', 1, 'rand() without args is under 1'); } { # [perl #115928] use a standard rand() implementation diff --git a/t/op/sort.t b/t/op/sort.t index 3c76365..22d83a9 100644 --- a/t/op/sort.t +++ b/t/op/sort.t @@ -7,7 +7,7 @@ BEGIN { set_up_inc('../lib'); } use warnings; -plan(tests => 193); +plan(tests => 195); # these shouldn't hang { @@ -1127,3 +1127,14 @@ pass "no crash when sort block deletes *a and *b"; ::is (join('-', sort f2 3,1,2,4), '1-2-3-4', "Ret: f2"); ::is (join('-', sort f3 3,1,2,4), '1-2-3-4', "Ret: f3"); } + +{ + @a = sort{ *a=0; 1} 0..1; + pass "No crash when GP deleted out from under us [perl 124097]"; + + no warnings 'redefine'; + # some alternative non-solutions localized modifications to *a and *b + sub a { 0 }; + @a = sort { *a = sub { 1 }; $a <=> $b } 0 .. 1; + ok(a(), "*a wasn't localized inadvertantly"); +} diff --git a/t/op/sprintf2.t b/t/op/sprintf2.t index a898df1..2784bde 100644 --- a/t/op/sprintf2.t +++ b/t/op/sprintf2.t @@ -67,6 +67,10 @@ if ($Config{nvsize} == 8 && [ '% 20.10a', '3.14', ' 0x1.91eb851eb8p+1' ], [ '%020.10a', '3.14', '0x0001.91eb851eb8p+1' ], + [ '%.13a', '1', '0x1.0000000000000p+0' ], + [ '%.13a', '-1', '-0x1.0000000000000p+0' ], + [ '%.13a', '0', '0x0.0000000000000p+0' ], + [ '%30a', '3.14', ' 0x1.91eb851eb851fp+1' ], [ '%-30a', '3.14', '0x1.91eb851eb851fp+1 ' ], [ '%030a', '3.14', '0x00000000001.91eb851eb851fp+1' ], @@ -243,7 +247,7 @@ if ($Config{nvsize} == 8 && print "# no hexfloat tests\n"; } -plan tests => 1408 + ($Q ? 0 : 12) + @hexfloat + 6; +plan tests => 1408 + ($Q ? 0 : 12) + @hexfloat + 9; use strict; use Config; @@ -576,23 +580,27 @@ $o::count = 0; is $o::count, 0, 'sprintf %d string overload count is 0'; is $o::numcount, 1, 'sprintf %d number overload count is 1'; -my $ppc64_linux = $Config{archname} =~ /^ppc64-linux/; -my $irix_ld = $Config{archname} =~ /^IP\d+-irix-ld$/; +my $ppc_linux = $Config{archname} =~ /^(?:ppc|power(?:pc)?)(?:64)?-linux/; +my $irix_ld = $Config{archname} =~ /^IP\d+-irix-ld$/; for my $t (@hexfloat) { my ($format, $arg, $expected) = @$t; $arg = eval $arg; my $result = sprintf($format, $arg); my $ok = $result eq $expected; - if ($doubledouble && $ppc64_linux && $arg =~ /^2.71828/) { - # ppc64-linux has buggy exp(1). + # For certain platforms (all of which are currently double-double, + # but different implementations, GNU vs vendor, two different archs + # (ppc and mips), and two different libm interfaces) we have some + # bits-in-the-last-hexdigit differences. + # Patch them up as TODOs instead of deadly errors. + if ($doubledouble && $ppc_linux && $arg =~ /^2.71828/) { + # gets '0x1.5bf0a8b1457695355fb8ac404ecp+1' + # wants '0x1.5bf0a8b1457695355fb8ac404e8p+1' local $::TODO = "$Config{archname} exp(1)"; ok($ok, "'$format' '$arg' -> '$result' cf '$expected'"); next; } if ($doubledouble && $irix_ld && $arg =~ /^1.41421/) { - # irix has buggy sqrt(2), - # last hexdigit one bit error: # gets '0x1.6a09e667f3bcc908b2fb1366eacp+0' # wants '0x1.6a09e667f3bcc908b2fb1366ea8p+0' local $::TODO = "$Config{archname} sqrt(2)"; @@ -661,8 +669,17 @@ for my $t (@hexfloat) { # double-double long double %a special testing. SKIP: { - skip("$^O doublekind=$Config{doublekind}", 6) - unless ($Config{doublekind} == 4 && $^O eq 'linux'); + skip("uselongdouble=" . ($Config{uselongdouble} ? 'define' : 'undef') + . " longdblkind=$Config{longdblkind} os=$^O", 6) + unless ($Config{uselongdouble} && + ($Config{longdblkind} == 5 || + $Config{longdblkind} == 6) + # Gating on 'linux' (ppc) here is due to the differing + # double-double implementations: other (also big-endian) + # double-double platforms (e.g. AIX on ppc or IRIX on mips) + # do not behave similarly. + && $^O eq 'linux' + ); # [rt.perl.org 125633] like(sprintf("%La\n", (2**1020) + (2**-1072)), qr/^0x1.0{522}1p\+1020$/); @@ -677,3 +694,11 @@ SKIP: { like(sprintf("%La\n", (2**1023) + (2**-1074)), qr/^0x1.0{524}8p\+1023$/); } + +SKIP: { + skip("negative zero not available\n", 3) + unless sprintf('%+f', -0.0) =~ /^-0/; + is(sprintf("%a", -0.0), "-0x0p+0", "negative zero"); + is(sprintf("%+a", -0.0), "-0x0p+0", "negative zero"); + is(sprintf("%.13a", -0.0), "-0x0.0000000000000p+0", "negative zero"); +} diff --git a/t/op/substr.t b/t/op/substr.t index 71e9e89..eae2403 100644 --- a/t/op/substr.t +++ b/t/op/substr.t @@ -714,6 +714,7 @@ is($x, "\x{100}\x{200}\xFFb"); # [perl #23765] { my $a = pack("C", 0xbf); + no warnings 'deprecated'; substr($a, -1) &= chr(0xfeff); is($a, "\xbf"); } diff --git a/t/op/svleak.t b/t/op/svleak.t index 076f2bf..4c7a493 100644 --- a/t/op/svleak.t +++ b/t/op/svleak.t @@ -15,7 +15,7 @@ BEGIN { use Config; -plan tests => 129; +plan tests => 130; # run some code N times. If the number of SVs at the end of loop N is # greater than (N-1)*delta at the end of loop 1, we've got a leak @@ -493,3 +493,27 @@ $x = $mdr::a[0]{foo}{$mdr::k}{$mdr::i}; $x = $mdr::h[0]{foo}{$mdr::k}{$mdr::i}; $x = $mdr::r->[0]{foo}{$mdr::k}{$mdr::i}; EOF + +# un-localizing a tied (or generally magic) item could leak if the things +# called by mg_set() died + +{ + package MG_SET; + + sub TIESCALAR { bless [] } + sub FETCH { 1; } + my $do_die = 0; + sub STORE { die if $do_die; } + + sub f { + local $s; + tie $s, 'MG_SET'; + local $s; + $do_die = 1; + } + sub g { + eval { my $x = f(); }; + } + + ::leak(5,0, \&g, "MG_SET"); +} diff --git a/t/op/threads.t b/t/op/threads.t index e76c956..123ad27 100644 --- a/t/op/threads.t +++ b/t/op/threads.t @@ -139,7 +139,7 @@ watchdog(180, "process"); { local $SIG{__WARN__} = sub {}; # Ignore any thread creation failure warnings my @t; - for (1..100) { + for (1..10) { my $thr = threads->create( sub { require IO }); last if !defined($thr); # Probably ran out of memory push(@t, $thr); diff --git a/t/op/ver.t b/t/op/ver.t index 144a352..503efd7 100644 --- a/t/op/ver.t +++ b/t/op/ver.t @@ -223,10 +223,9 @@ $v = $revision + $version/1000 + $subversion/1000000; ok( abs($v - $]) < 10**-8 , "\$^V == \$] (numeric)" ); -SKIP: { - skip("In EBCDIC the v-string components cannot exceed 2147483647", 6) - if $::IS_EBCDIC; +{ + no warnings 'deprecated'; # These are above IV_MAX on 32 bit machines # [ID 20010902.001] check if v-strings handle full UV range or not if ( $Config{'uvsize'} >= 4 ) { is( sprintf("%vd", eval 'v2147483647.2147483648'), '2147483647.2147483648', 'v-string > IV_MAX[32-bit]' ); diff --git a/t/opbasic/qq.t b/t/opbasic/qq.t index 190ddb7..5d6908c 100644 --- a/t/opbasic/qq.t +++ b/t/opbasic/qq.t @@ -73,6 +73,7 @@ is ("a\o{1000}b", "a" . chr(0x200) . "b"); # This caused a memory fault no warnings "utf8"; +no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines is ("abc", eval qq[qq\x{8000_0000}abc\x{8000_0000}]); # Maybe \x{} should be an error, but if not it should certainly mean \x{0} diff --git a/t/porting/customized.dat b/t/porting/customized.dat index dd75426..60047a5 100644 --- a/t/porting/customized.dat +++ b/t/porting/customized.dat @@ -2,12 +2,13 @@ CPAN cpan/CPAN/lib/CPAN.pm ce62c43d72f101c011184dbbc59e21c2790826f0 Encode cpan/Encode/Encode.xs ef106510cceba35eaae4c52127116162f5d7260f Encode cpan/Encode/encoding.pm 51c19efc9bfe8467d6ae12a4654f6e7f980715bf Encode cpan/Encode/Unicode/Unicode.xs c7ab75e09f6b2685060d3c0bd091862fc2d31724 +ExtUtils::Constant cpan/ExtUtils-Constant/t/Constant.t a0369c919e216fb02767a637666bb4577ad79b02 ExtUtils::MakeMaker cpan/ExtUtils-MakeMaker/lib/ExtUtils/MakeMaker.pm fd048a43fc1a53acbe133bd96ddbf1421cfb28cf ExtUtils::MakeMaker cpan/ExtUtils-MakeMaker/lib/ExtUtils/MM_VMS.pm 0c78ba02d6249dfcca12ac9886a7c7cfb60e77fe ExtUtils::MakeMaker cpan/ExtUtils-MakeMaker/t/prereq.t 53bda2c549fd13a6b6c13a070ca6bc79883081c0 ExtUtils::MakeMaker cpan/ExtUtils-MakeMaker/t/vstrings.t 90035a2bdbf45f15b9c3196d072d7cba7e662871 Math::BigRat cpan/Math-BigRat/lib/Math/BigRat.pm 682352dde33638125ce12ca44990bd1cd44af4f8 -Pod::Perldoc cpan/Pod-Perldoc/lib/Pod/Perldoc.pm dcd53fba13060dbb71b1b5861fbc5c0881c8625a +Pod::Perldoc cpan/Pod-Perldoc/lib/Pod/Perldoc.pm fe0bc906fb74b69cfd3fb289316ba669d770d465 Scalar-List-Utils cpan/Scalar-List-Utils/lib/List/Util.pm 62d2a82a811b531a3fd25cb60c4c2ef943858892 Scalar-List-Utils cpan/Scalar-List-Utils/lib/List/Util/XS.pm 08abbe1a707927cee53e85ba85d6bd35c1c2ae50 Scalar-List-Utils cpan/Scalar-List-Utils/lib/Scalar/Util.pm 7f1e6eb11105623200ef9cdcb881545ccb769ded @@ -15,7 +16,6 @@ Scalar-List-Utils cpan/Scalar-List-Utils/lib/Sub/Util.pm d87811528ae3587f04e2f09 Scalar-List-Utils cpan/Scalar-List-Utils/ListUtil.xs ed25abc419771d6f3f12323f1f0a372f043d51b2 Socket cpan/Socket/Socket.pm bdc42a2bd5cb560ed1120a3e6f408ed7ece14dce Socket cpan/Socket/Socket.xs 6102315291684e56e360ff5e0dd237c9394c49b8 -Text::ParseWords cpan/Text-ParseWords/t/ParseWords.t 9bae51c9b944cd5c0bbabe9d397e573976a2be8e Win32API::File cpan/Win32API-File/buffers.h 02d230ac9ac7091365128161a0ed671898baefae Win32API::File cpan/Win32API-File/cFile.h fca7e383e76979c3ac3adf12d11d1bcd2618e489 Win32API::File cpan/Win32API-File/cFile.pc 992421eea7782a5957b64f66764f6ffb5093bee4 diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index 18566eb..f874325 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -273,7 +273,7 @@ pod/perlcygwin.pod Verbatim line length including indents exceeds 79 by 20 pod/perldebguts.pod Verbatim line length including indents exceeds 79 by 27 pod/perldebtut.pod Verbatim line length including indents exceeds 79 by 3 pod/perldtrace.pod Verbatim line length including indents exceeds 79 by 7 -pod/perlgit.pod Verbatim line length including indents exceeds 79 by 12 +pod/perlgit.pod Verbatim line length including indents exceeds 79 by 1 pod/perlguts.pod ? Should you be using L<...> instead of 1 pod/perlguts.pod Verbatim line length including indents exceeds 79 by 1 pod/perlhack.pod ? Should you be using L<...> instead of 1 diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t index 27f730d..fff68f3 100644 --- a/t/re/fold_grind.t +++ b/t/re/fold_grind.t @@ -425,7 +425,7 @@ sub pairs (@) { my $utf8_locale; my @charsets = qw(d u a aa); -if($Config{d_setlocale}) { +if (locales_enabled('LC_CTYPE')) { my $current_locale = POSIX::setlocale( &POSIX::LC_CTYPE, "C") // ""; if ($current_locale eq 'C') { use locale; diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index df8090c..1488a88 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -2372,15 +2372,14 @@ EOF sub Is_32_Bit_Super { return "110000\tFFFFFFFF\n" } sub Is_Portable_Super { return '!utf8::Any' } # Matches beyond 32 bits - SKIP: { # Assertion was failing on on 64-bit platforms; just didn't work on 32. - skip("EBCDIC only goes to 31 bits", 4) if $::IS_EBCDIC; no warnings qw(non_unicode portable); + no warnings 'deprecated'; # These are above IV_MAX use Config; # We use 'ok' instead of 'like' because the warnings are lexically # scoped, and want to turn them off, so have to do the match in this - # scope. (EBCDIC platforms can't handle above 2**32 - 1 + # scope. if ($Config{uvsize} < 8) { ok(chr(0xFFFF_FFFE) =~ /\p{Is_32_Bit_Super}/, "chr(0xFFFF_FFFE) can match a Unicode property"); @@ -2452,7 +2451,9 @@ EOF # (during compilation, so use a fresh perl) $Config{uvsize} == 8 or skip("need large code-points for this test", 1); - fresh_perl_is('/\x{E000000000}|/ and print qq(ok\n)', "ok\n", {}, + + # This is above IV_MAX on 32 bit machines, so turn off those warnings + fresh_perl_is('no warnings "deprecated"; /\x{E000000000}|/ and print qq(ok\n)', "ok\n", {}, "buffer overflow in TRIE_STORE_REVCHAR"); } diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 62e3e4a..63b5e1b 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -219,7 +219,6 @@ my @death = '/(?[ \cK + ) ])/' => 'Unexpected \')\' {#} m/(?[ \cK + ){#} ])/', '/(?[ \cK + ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ \cK + {#}])/', '/(?[ ( ) ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ ( ) {#}])/', - '/(?[ ( ) ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ ( ) {#}])/', '/(?[[0]+()+])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[[0]+()+{#}])/', '/(?[ \p{foo} ])/' => 'Can\'t find Unicode property definition "foo" {#} m/(?[ \p{foo}{#} ])/', '/(?[ \p{ foo = bar } ])/' => 'Can\'t find Unicode property definition "foo = bar" {#} m/(?[ \p{ foo = bar }{#} ])/', diff --git a/t/re/speed.t b/t/re/speed.t index 4d6e2f0..28d4dbb 100644 --- a/t/re/speed.t +++ b/t/re/speed.t @@ -125,11 +125,15 @@ sub run_tests { for my $s ('', 's') { my $XBOL = $s ? 'SBOL' : 'MBOL'; my $text = "anchored($XBOL) implicit"; - fresh_perl_like(<<"PROG", qr/\b\Q$text\E\b/, {}, "/.${star}${greedy}X/${flags}${s} anchors implicitly"); +TODO: + { + local $main::TODO = 'regdump gets mangled by the VMS pipe implementation' if $^O eq 'VMS'; + fresh_perl_like(<<"PROG", qr/\b\Q$text\E\b/, {}, "/.${star}${greedy}X/${flags}${s} anchors implicitly"); BEGIN { \@INC = ('../lib', '.', '../ext/re'); } use re 'debug'; qr/.${star}${greedy}:::\\s*ab/${flags}${s} PROG + } } } } diff --git a/t/re/subst.t b/t/re/subst.t index 7939fe5..7826ecb 100644 --- a/t/re/subst.t +++ b/t/re/subst.t @@ -9,7 +9,7 @@ BEGIN { require './loc_tools.pl'; } -plan( tests => 268 ); +plan( tests => 269 ); $_ = 'david'; $a = s/david/rules/r; @@ -1083,3 +1083,15 @@ SKIP: { fresh_perl_is('s//*_=0;s|0||;00.y0/e; print qq(ok\n)', "ok\n", { stderr => 1 }, "[perl #126602] s//*_=0;s|0||/e crashes"); } + +{ + #RT 126260 gofs is in chars, not bytes + + # in something like /..\G/, the engine should start matching two + # chars before pos(). At one point it was matching two bytes before. + + my $s = "\x{121}\x{122}\x{123}"; + pos($s) = 2; + $s =~ s/..\G//g; + is($s, "\x{123}", "#RT 126260 gofs"); +} diff --git a/t/run/switchC.t b/t/run/switchC.t index 4f63c3b..6583010 100644 --- a/t/run/switchC.t +++ b/t/run/switchC.t @@ -11,7 +11,7 @@ BEGIN { skip_all_if_miniperl('-C and $ENV{PERL_UNICODE} are disabled on miniperl'); } -plan(tests => 14); +plan(tests => 15); my $r; @@ -111,3 +111,9 @@ SKIP: { like( $r, qr/^Too late for "-CS" option at -e line 1\.$/s, '#!perl -C but not command line' ); } + +$r = runperl ( switches => [ '-C00' ], + prog => '1', + stderr => 1, ); +like($r, qr/^Invalid number '00' for -C option\.$/s, + "perl -C00 [perl #123991]"); diff --git a/t/run/switches.t b/t/run/switches.t index 78915e0..aa9bda3 100644 --- a/t/run/switches.t +++ b/t/run/switches.t @@ -10,12 +10,11 @@ BEGIN { require Config; import Config; } -BEGIN { require "./test.pl"; } +BEGIN { require "./test.pl"; require "./loc_tools.pl"; } plan(tests => 115); use Config; -BEGIN { eval 'use POSIX qw(setlocale LC_ALL)' } # due to a bug in VMS's piping which makes it impossible for runperl() # to emulate echo -n (ie. stdin always winds up with a newline), these @@ -110,8 +109,7 @@ SWTEST } SKIP: { - skip "no POSIX on miniperl", 1, unless $INC{"POSIX.pm"}; - skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); + skip 'locales not available', 1 unless locales_enabled('LC_ALL'); my $tempdir = tempfile; mkdir $tempdir, 0700 or die "Can't mkdir '$tempdir': $!"; diff --git a/t/uni/case.pl b/t/uni/case.pl index d4bfc07..c3d5926 100644 --- a/t/uni/case.pl +++ b/t/uni/case.pl @@ -82,7 +82,7 @@ sub casetest { my $test = $already_run + 1; - for my $ord (sort keys %simple) { + for my $ord (sort { $a <=> $b } keys %simple) { my $char = pack "W", $ord; my $disp_input = unidump($char); @@ -96,7 +96,7 @@ sub casetest { } } - for my $ord (sort keys %spec) { + for my $ord (sort { $a <=> $b } keys %spec) { my $char = pack "W", $ord; my $disp_input = unidump($char); diff --git a/t/uni/fold.t b/t/uni/fold.t index 1a7e865..949ed97 100644 --- a/t/uni/fold.t +++ b/t/uni/fold.t @@ -431,12 +431,9 @@ foreach my $test_ref (@CF) { SKIP: { use feature qw( fc unicode_strings ); - eval { require POSIX; import POSIX 'locale_h'; }; - unless (defined &POSIX::LC_ALL) { - skip "no POSIX (or no Fcntl, or no dynamic loading)", 256; - } + skip "locales not available", 256 unless locales_enabled('LC_ALL'); - setlocale(&POSIX::LC_ALL, "C") if $Config{d_setlocale}; + setlocale(&POSIX::LC_ALL, "C"); # This tests both code paths in pp_fc diff --git a/t/uni/heavy.t b/t/uni/heavy.t new file mode 100644 index 0000000..c257dbc --- /dev/null +++ b/t/uni/heavy.t @@ -0,0 +1,40 @@ +#!./perl -w +# tests that utf8_heavy.pl doesn't use anything that prevents it loading +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +plan tests => 1; + +# see [perl #126593] +fresh_perl_is(<<'EOP', "", { stderr => 1 }, "doesn't break with \${^ENCODING}"); +no warnings qw(deprecated); +package Foo; +sub cat_decode { + # stolen from Encode.pm + my ( undef, undef, undef, $pos, $trm ) = @_; + my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; + use bytes; + if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { + $$rdst .= + substr( $$rsrc, $pos, $npos - $pos + length($trm) ); + $$rpos = $npos + length($trm); + return 1; + } + $$rdst .= substr( $$rsrc, $pos ); + $$rpos = length($$rsrc); + return q(); +} + +sub decode { + my (undef, $tmp) = @_; + utf8::decode($tmp); + $tmp; +} + +BEGIN { ${^ENCODING} = bless [], q(Foo) }; + +(my $tmp = q(abc)) =~ tr/abc/123/; +EOP diff --git a/t/uni/overload.t b/t/uni/overload.t index ff89b08..eb8d32a 100644 --- a/t/uni/overload.t +++ b/t/uni/overload.t @@ -4,7 +4,9 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; require Config; import Config; - require './test.pl'; require './charset_tools.pl'; + require './test.pl'; + require './charset_tools.pl'; + require './loc_tools.pl'; } plan(tests => 215); @@ -95,14 +97,7 @@ $uc = ucfirst $u; is (length $uc, 1); is ($uc, $e_acute, "e acute -> E acute"); -my $have_setlocale = 0; -eval { - require POSIX; - if($Config{d_setlocale}) { - import POSIX ':locale_h'; - $have_setlocale++; - } -}; +my $have_setlocale = locales_enabled('LC_ALL'); SKIP: { if (!$have_setlocale) { diff --git a/thread.h b/thread.h index 1fb1cc6..9958a5b 100644 --- a/thread.h +++ b/thread.h @@ -208,10 +208,18 @@ } STMT_END # endif +# ifdef PERL_TSA_ACTIVE +# define perl_pthread_mutex_lock(m) perl_tsa_mutex_lock(m) +# define perl_pthread_mutex_unlock(m) perl_tsa_mutex_unlock(m) +# else +# define perl_pthread_mutex_lock(m) pthread_mutex_lock(m) +# define perl_pthread_mutex_unlock(m) pthread_mutex_unlock(m) +# endif + # define MUTEX_LOCK(m) \ STMT_START { \ int _eC_; \ - if ((_eC_ = pthread_mutex_lock((m)))) \ + if ((_eC_ = perl_pthread_mutex_lock((m)))) \ Perl_croak_nocontext("panic: MUTEX_LOCK (%d) [%s:%d]", \ _eC_, __FILE__, __LINE__); \ } STMT_END @@ -219,7 +227,7 @@ # define MUTEX_UNLOCK(m) \ STMT_START { \ int _eC_; \ - if ((_eC_ = pthread_mutex_unlock((m)))) \ + if ((_eC_ = perl_pthread_mutex_unlock((m)))) \ Perl_croak_nocontext("panic: MUTEX_UNLOCK (%d) [%s:%d]", \ _eC_, __FILE__, __LINE__); \ } STMT_END diff --git a/toke.c b/toke.c index 2c0a3c9..7f45f1b 100644 --- a/toke.c +++ b/toke.c @@ -3357,10 +3357,7 @@ S_scan_const(pTHX_ char *start) } NUM_ESCAPE_INSERT: - /* Insert oct or hex escaped character. There will always be - * enough room in sv since such escapes will be longer than any - * UTF-8 sequence they can end up as, except if they force us - * to recode the rest of the string into utf8 */ + /* Insert oct or hex escaped character. */ /* Here uv is the ordinal of the next character being added */ if (UVCHR_IS_INVARIANT(uv)) { @@ -3388,6 +3385,20 @@ S_scan_const(pTHX_ char *start) } if (has_utf8) { + /* Usually, there will already be enough room in 'sv' + * since such escapes are likely longer than any UTF-8 + * sequence they can end up as. This isn't the case on + * EBCDIC where \x{40000000} contains 12 bytes, and the + * UTF-8 for it contains 14. And, we have to allow for + * a trailing NUL. It probably can't happen on ASCII + * platforms, but be safe */ + const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv) + + 1; + if (UNLIKELY(needed > SvLEN(sv))) { + SvCUR_set(sv, d - SvPVX_const(sv)); + d = sv_grow(sv, needed) + SvCUR(sv); + } + d = (char*)uvchr_to_utf8((U8*)d, uv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) @@ -3582,7 +3593,7 @@ S_scan_const(pTHX_ char *start) /* The regex compiler is * expecting Unicode, not * native */ - (U8) NATIVE_TO_LATIN1(*str)); + NATIVE_TO_LATIN1(*str)); PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string)); Copy(hex_string, d, 3, char); @@ -9617,12 +9628,14 @@ S_scan_heredoc(pTHX_ char *s) else { SV *linestr_save; + char *oldbufptr_save; streaming: sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ term = PL_tokenbuf[1]; len--; linestr_save = PL_linestr; /* must restore this afterwards */ d = s; /* and this */ + oldbufptr_save = PL_oldbufptr; PL_linestr = newSVpvs(""); PL_bufend = SvPVX(PL_linestr); while (1) { @@ -9639,6 +9652,7 @@ S_scan_heredoc(pTHX_ char *s) restore PL_linestr. */ SvREFCNT_dec_NN(PL_linestr); PL_linestr = linestr_save; + PL_oldbufptr = oldbufptr_save; goto interminable; } CopLINE_set(PL_curcop, origline); @@ -9673,6 +9687,7 @@ S_scan_heredoc(pTHX_ char *s) PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_oldbufptr = oldbufptr_save; s = d; break; } @@ -10505,6 +10520,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) hexfp_uquad |= b; hexfp_frac_bits += shift2; #else /* HEXFP_NV */ + PERL_UNUSED_VAR(shift2); hexfp_nv += b * mult; mult /= 16.0; #endif @@ -11377,10 +11393,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) "Integer overflow in decimal number"); } } -#ifdef EBCDIC - if (rev > 0x7FFFFFFF) - Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); -#endif + /* Append native character for the rev point */ tmpend = uvchr_to_utf8(tmpbuf, rev); sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); diff --git a/uconfig.h b/uconfig.h index a020e59..af65ca7 100644 --- a/uconfig.h +++ b/uconfig.h @@ -1932,8 +1932,10 @@ * LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE * LONG_DOUBLE_IS_UNKNOWN_FORMAT * It is only defined if the system supports long doubles. */ @@ -1947,9 +1949,14 @@ #define LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN 2 #define LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 3 #define LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN 4 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN 5 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE 5 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE 7 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE 8 #define LONG_DOUBLE_IS_UNKNOWN_FORMAT -1 +/* Backward compat. */ +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE #endif /* HAS_LONG_LONG: @@ -5168,6 +5175,6 @@ #endif /* Generated from: - * 71bc7991dfb75b5d8a1082c2dd90d90d8f912ecfca90533df598d84f2c3d2382 config_h.SH + * 1fbdd1f584710d990cbc1b624770986e12ad6e3eac21c9f3851e6a0ad5a7fbce config_h.SH * 0ce9d24f6ed83c533882929bc7c0138fe345656c4b7070aad99bb103dbf3790a uconfig.sh * ex: set ro: */ diff --git a/unicode_constants.h b/unicode_constants.h index 71755de..1384873 100644 --- a/unicode_constants.h +++ b/unicode_constants.h @@ -182,6 +182,9 @@ /* The number of code points not matching \pC */ #define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C 120522 +/* The highest code point that has any type of case change */ +#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x118DF + #endif /* H_UNICODE_CONSTANTS */ /* ex: set ro: */ diff --git a/utf8.c b/utf8.c index 86e793b..45ee51e 100644 --- a/utf8.c +++ b/utf8.c @@ -35,6 +35,10 @@ static const char unees[] = "Malformed UTF-8 character (unexpected end of string)"; +static const char cp_above_legal_max[] = + "Use of code point 0x%"UVXf" is deprecated; the permissible max is 0x%"UVXf""; + +#define MAX_NON_DEPRECATED_CP (IV_MAX) /* =head1 Unicode Support @@ -99,144 +103,162 @@ For details, see the description for L. =cut */ +#define HANDLE_UNICODE_SURROGATE(uv, flags) \ + STMT_START { \ + if (flags & UNICODE_WARN_SURROGATE) { \ + Perl_ck_warner_d(aTHX_ packWARN(WARN_SURROGATE), \ + "UTF-16 surrogate U+%04"UVXf, uv); \ + } \ + if (flags & UNICODE_DISALLOW_SURROGATE) { \ + return NULL; \ + } \ + } STMT_END; + +#define HANDLE_UNICODE_NONCHAR(uv, flags) \ + STMT_START { \ + if (flags & UNICODE_WARN_NONCHAR) { \ + Perl_ck_warner_d(aTHX_ packWARN(WARN_NONCHAR), \ + "Unicode non-character U+%04"UVXf" is not " \ + "recommended for open interchange", uv); \ + } \ + if (flags & UNICODE_DISALLOW_NONCHAR) { \ + return NULL; \ + } \ + } STMT_END; + +/* Use shorter names internally in this file */ +#define SHIFT UTF_ACCUMULATION_SHIFT +#undef MARK +#define MARK UTF_CONTINUATION_MARK +#define MASK UTF_CONTINUATION_MASK + U8 * Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) { PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS; if (OFFUNI_IS_INVARIANT(uv)) { - *d++ = (U8) LATIN1_TO_NATIVE(uv); + *d++ = LATIN1_TO_NATIVE(uv); return d; } -#ifdef EBCDIC - /* Not representable in UTF-EBCDIC */ - flags |= UNICODE_DISALLOW_FE_FF; + if (uv <= MAX_UTF8_TWO_BYTE) { + *d++ = I8_TO_NATIVE_UTF8(( uv >> SHIFT) | UTF_START_MARK(2)); + *d++ = I8_TO_NATIVE_UTF8(( uv & MASK) | MARK); + return d; + } + + /* Not 2-byte; test for and handle 3-byte result. In the test immediately + * below, the 16 is for start bytes E0-EF (which are all the possible ones + * for 3 byte characters). The 2 is for 2 continuation bytes; these each + * contribute SHIFT bits. This yields 0x4000 on EBCDIC platforms, 0x1_0000 + * on ASCII; so 3 bytes covers the range 0x400-0x3FFF on EBCDIC; + * 0x800-0xFFFF on ASCII */ + if (uv < (16 * (1U << (2 * SHIFT)))) { + *d++ = I8_TO_NATIVE_UTF8(( uv >> ((3 - 1) * SHIFT)) | UTF_START_MARK(3)); + *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) | MARK); + *d++ = I8_TO_NATIVE_UTF8(( uv /* (1 - 1) */ & MASK) | MARK); + +#ifndef EBCDIC /* These problematic code points are 4 bytes on EBCDIC, so + aren't tested here */ + /* The most likely code points in this range are below the surrogates. + * Do an extra test to quickly exclude those. */ + if (UNLIKELY(uv >= UNICODE_SURROGATE_FIRST)) { + if (UNLIKELY( UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv) + || UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv))) + { + HANDLE_UNICODE_NONCHAR(uv, flags); + } + else if (UNLIKELY(UNICODE_IS_SURROGATE(uv))) { + HANDLE_UNICODE_SURROGATE(uv, flags); + } + } #endif + return d; + } - /* The first problematic code point is the first surrogate */ - if ( flags /* It's common to turn off all these */ - && uv >= UNICODE_SURROGATE_FIRST - && ckWARN3_d(WARN_SURROGATE, WARN_NON_UNICODE, WARN_NONCHAR)) - { - if (UNICODE_IS_SURROGATE(uv)) { - if (flags & UNICODE_WARN_SURROGATE) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_SURROGATE), - "UTF-16 surrogate U+%04"UVXf, uv); - } - if (flags & UNICODE_DISALLOW_SURROGATE) { - return NULL; - } - } - else if (UNICODE_IS_SUPER(uv)) { - if (flags & UNICODE_WARN_SUPER - || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_WARN_FE_FF))) - { - Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE), - "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv); - } - if (flags & UNICODE_DISALLOW_SUPER - || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_DISALLOW_FE_FF))) - { -#ifdef EBCDIC - Perl_die(aTHX_ "Can't represent character for Ox%"UVXf" on this platform", uv); - NOT_REACHED; /* NOTREACHED */ + /* Not 3-byte; that means the code point is at least 0x1_0000 on ASCII + * platforms, and 0x4000 on EBCDIC. There are problematic cases that can + * happen starting with 4-byte characters on ASCII platforms. We unify the + * code for these with EBCDIC, even though some of them require 5-bytes on + * those, because khw believes the code saving is worth the very slight + * performance hit on these high EBCDIC code points. */ + + if (UNLIKELY(UNICODE_IS_SUPER(uv))) { + if ( UNLIKELY(uv > MAX_NON_DEPRECATED_CP) + && ckWARN_d(WARN_DEPRECATED)) + { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + cp_above_legal_max, uv, MAX_NON_DEPRECATED_CP); + } + if ( (flags & UNICODE_WARN_SUPER) + || ( UNICODE_IS_ABOVE_31_BIT(uv) + && (flags & UNICODE_WARN_ABOVE_31_BIT))) + { + Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE), + + /* Choose the more dire applicable warning */ + (UNICODE_IS_ABOVE_31_BIT(uv)) + ? "Code point 0x%"UVXf" is not Unicode, and not portable" + : "Code point 0x%"UVXf" is not Unicode, may not be portable", + uv); + } + if (flags & UNICODE_DISALLOW_SUPER + || ( UNICODE_IS_ABOVE_31_BIT(uv) + && (flags & UNICODE_DISALLOW_ABOVE_31_BIT))) + { + return NULL; + } + } + else if (UNLIKELY(UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv))) { + HANDLE_UNICODE_NONCHAR(uv, flags); + } + + /* Test for and handle 4-byte result. In the test immediately below, the + * 8 is for start bytes F0-F7 (which are all the possible ones for 4 byte + * characters). The 3 is for 3 continuation bytes; these each contribute + * SHIFT bits. This yields 0x4_0000 on EBCDIC platforms, 0x20_0000 on + * ASCII, so 4 bytes covers the range 0x4000-0x3_FFFF on EBCDIC; + * 0x1_0000-0x1F_FFFF on ASCII */ + if (uv < (8 * (1U << (3 * SHIFT)))) { + *d++ = I8_TO_NATIVE_UTF8(( uv >> ((4 - 1) * SHIFT)) | UTF_START_MARK(4)); + *d++ = I8_TO_NATIVE_UTF8(((uv >> ((3 - 1) * SHIFT)) & MASK) | MARK); + *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) | MARK); + *d++ = I8_TO_NATIVE_UTF8(( uv /* (1 - 1) */ & MASK) | MARK); + +#ifdef EBCDIC /* These were handled on ASCII platforms in the code for 3-byte + characters. The end-plane non-characters for EBCDIC were + handled just above */ + if (UNLIKELY(UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv))) { + HANDLE_UNICODE_NONCHAR(uv, flags); + } + else if (UNLIKELY(UNICODE_IS_SURROGATE(uv))) { + HANDLE_UNICODE_SURROGATE(uv, flags); + } #endif - return NULL; - } - } - else if (UNICODE_IS_NONCHAR(uv)) { - if (flags & UNICODE_WARN_NONCHAR) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_NONCHAR), - "Unicode non-character U+%04"UVXf" is not recommended for open interchange", - uv); - } - if (flags & UNICODE_DISALLOW_NONCHAR) { - return NULL; - } - } + + return d; } -#if defined(EBCDIC) + /* Not 4-byte; that means the code point is at least 0x20_0000 on ASCII + * platforms, and 0x4000 on EBCDIC. At this point we switch to a loop + * format. The unrolled version above turns out to not save all that much + * time, and at these high code points (well above the legal Unicode range + * on ASCII platforms, and well above anything in common use in EBCDIC), + * khw believes that less code outweighs slight performance gains. */ + { STRLEN len = OFFUNISKIP(uv); U8 *p = d+len-1; while (p > d) { - *p-- = (U8) I8_TO_NATIVE_UTF8((uv & UTF_CONTINUATION_MASK) | UTF_CONTINUATION_MARK); + *p-- = I8_TO_NATIVE_UTF8((uv & UTF_CONTINUATION_MASK) | UTF_CONTINUATION_MARK); uv >>= UTF_ACCUMULATION_SHIFT; } - *p = (U8) I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len)); + *p = I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len)); return d+len; } -#else /* Non loop style */ - if (uv < 0x800) { - *d++ = (U8)(( uv >> 6) | 0xc0); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } - if (uv < 0x10000) { - *d++ = (U8)(( uv >> 12) | 0xe0); - *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } - if (uv < 0x200000) { - *d++ = (U8)(( uv >> 18) | 0xf0); - *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } - if (uv < 0x4000000) { - *d++ = (U8)(( uv >> 24) | 0xf8); - *d++ = (U8)(((uv >> 18) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } - if (uv < 0x80000000) { - *d++ = (U8)(( uv >> 30) | 0xfc); - *d++ = (U8)(((uv >> 24) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 18) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } -#ifdef UTF8_QUAD_MAX - if (uv < UTF8_QUAD_MAX) -#endif - { - *d++ = 0xfe; /* Can't match U+FEFF! */ - *d++ = (U8)(((uv >> 30) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 24) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 18) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } -#ifdef UTF8_QUAD_MAX - { - *d++ = 0xff; /* Can't match U+FFFE! */ - *d++ = 0x80; /* 6 Reserved bits */ - *d++ = (U8)(((uv >> 60) & 0x0f) | 0x80); /* 2 Reserved bits */ - *d++ = (U8)(((uv >> 54) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 48) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 42) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 36) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 30) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 24) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 18) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80); - *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80); - *d++ = (U8)(( uv & 0x3f) | 0x80); - return d; - } -#endif -#endif /* Non loop style */ } + /* =for apidoc uvchr_to_utf8 @@ -251,8 +273,12 @@ is the recommended wide native character-aware way of saying *(d++) = uv; -This function accepts any UV as input. To forbid or warn on non-Unicode code -points, or those that may be problematic, see L. +This function accepts any UV as input, but very high code points (above +C on the platform) will raise a deprecation warning. This is +typically 0x7FFF_FFFF in a 32-bit word. + +It is possible to forbid or warn on non-Unicode code points, or those that may +be problematic by using L. =cut */ @@ -284,27 +310,54 @@ This is the Unicode-aware way of saying *(d++) = uv; -This function will convert to UTF-8 (and not warn) even code points that aren't -legal Unicode or are problematic, unless C contains one or more of the -following flags: +If C is 0, this function accepts any UV as input, but very high code +points (above C for the platform) will raise a deprecation warning. +This is typically 0x7FFF_FFFF in a 32-bit word. + +Specifying C can further restrict what is allowed and not warned on, as +follows: If C is a Unicode surrogate code point and C is set, -the function will raise a warning, provided UTF8 warnings are enabled. If instead -C is set, the function will fail and return NULL. -If both flags are set, the function will both warn and return NULL. - -The C and C flags -affect how the function handles a Unicode non-character. And likewise, the -C and C flags affect the handling of -code points that are -above the Unicode maximum of 0x10FFFF. Code points above 0x7FFF_FFFF (which are -even less portable) can be warned and/or disallowed even if other above-Unicode -code points are accepted, by the C and -C flags. - -And finally, the flag C selects all four of +the function will raise a warning, provided UTF8 warnings are enabled. If +instead C is set, the function will fail and return +NULL. If both flags are set, the function will both warn and return NULL. + +Similarly, the C and C flags +affect how the function handles a Unicode non-character. + +And likewise, the C and C flags +affect the handling of code points that are above the Unicode maximum of +0x10FFFF. Languages other than Perl may not be able to accept files that +contain these. + +The flag C selects all three of the above WARN flags; and C selects all -four DISALLOW flags. +three DISALLOW flags. + +Code points above 0x7FFF_FFFF (2**31 - 1) were never specified in any standard, +so using them is more problematic than other above-Unicode code points. Perl +invented an extension to UTF-8 to represent the ones above 2**36-1, so it is +likely that non-Perl languages will not be able to read files that contain +these that written by the perl interpreter; nor would Perl understand files +written by something that uses a different extension. For these reasons, there +is a separate set of flags that can warn and/or disallow these extremely high +code points, even if other above-Unicode ones are accepted. These are the +C and C flags. These +are entirely independent from the deprecation warning for code points above +C. On 32-bit machines, it will eventually be forbidden to have any +code point that needs more than 31 bits to represent. When that happens, +effectively the C flag will always be set on +32-bit machines. (Of course C will treat all +above-Unicode code points, including these, as malformations; and +C warns on these.) + +On EBCDIC platforms starting in Perl v5.24, the Perl extension for representing +extremely high code points kicks in at 0x3FFF_FFFF (2**30 -1), which is lower +than on ASCII. Prior to that, code points 2**31 and higher were simply +unrepresentable, and a different, incompatible method was used to represent +code points between 2**30 and 2**31 - 1. The flags C +and C have the same function as on ASCII +platforms, warning and disallowing 2**31 and higher. =cut */ @@ -459,21 +512,35 @@ a malformation and raise a warning, specify both the WARN and DISALLOW flags. (But note that warnings are not raised if lexically disabled nor if C is also specified.) -Very large code points (above 0x7FFF_FFFF) are considered more problematic than -the others that are above the Unicode legal maximum. There are several -reasons: they requre at least 32 bits to represent them on ASCII platforms, are -not representable at all on EBCDIC platforms, and the original UTF-8 -specification never went above this number (the current 0x10FFFF limit was -imposed later). (The smaller ones, those that fit into 32 bits, are -representable by a UV on ASCII platforms, but not by an IV, which means that -the number of operations that can be performed on them is quite restricted.) -The UTF-8 encoding on ASCII platforms for these large code points begins with a -byte containing 0xFE or 0xFF. The C flag will cause them to -be treated as malformations, while allowing smaller above-Unicode code points. -(Of course C will treat all above-Unicode code points, -including these, as malformations.) -Similarly, C acts just like -the other WARN flags, but applies just to these code points. +It is now deprecated to have very high code points (above C on the +platforms) and this function will raise a deprecation warning for these (unless +such warnings are turned off). This value, is typically 0x7FFF_FFFF (2**31 -1) +in a 32-bit word. + +Code points above 0x7FFF_FFFF (2**31 - 1) were never specified in any standard, +so using them is more problematic than other above-Unicode code points. Perl +invented an extension to UTF-8 to represent the ones above 2**36-1, so it is +likely that non-Perl languages will not be able to read files that contain +these that written by the perl interpreter; nor would Perl understand files +written by something that uses a different extension. For these reasons, there +is a separate set of flags that can warn and/or disallow these extremely high +code points, even if other above-Unicode ones are accepted. These are the +C and C flags. These +are entirely independent from the deprecation warning for code points above +C. On 32-bit machines, it will eventually be forbidden to have any +code point that needs more than 31 bits to represent. When that happens, +effectively the C flag will always be set on +32-bit machines. (Of course C will treat all +above-Unicode code points, including these, as malformations; and +C warns on these.) + +On EBCDIC platforms starting in Perl v5.24, the Perl extension for representing +extremely high code points kicks in at 0x3FFF_FFFF (2**30 -1), which is lower +than on ASCII. Prior to that, code points 2**31 and higher were simply +unrepresentable, and a different, incompatible method was used to represent +code points between 2**30 and 2**31 - 1. The flags C +and C have the same function as on ASCII +platforms, warning and disallowing 2**31 and higher. All other code points corresponding to Unicode characters, including private use and those yet to be assigned, are never considered malformed and never @@ -587,7 +654,6 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) for (s = s0 + 1; s < send; s++) { if (LIKELY(UTF8_IS_CONTINUATION(*s))) { -#ifndef EBCDIC /* Can't overflow in EBCDIC */ if (uv & UTF_ACCUMULATION_OVERFLOW_MASK) { /* The original implementors viewed this malformation as more @@ -599,7 +665,6 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) overflowed = TRUE; overflow_byte = *s; /* Save for warning message's use */ } -#endif uv = UTF8_ACCUMULATE(uv, *s); } else { @@ -666,12 +731,10 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) } } -#ifndef EBCDIC /* EBCDIC can't overflow */ if (UNLIKELY(overflowed)) { sv = sv_2mortal(Perl_newSVpvf(aTHX_ "%s (overflow at byte 0x%02x, after start byte 0x%02x)", malformed_text, overflow_byte, *s0)); goto malformed; } -#endif if (do_overlong_test && expectlen > (STRLEN) OFFUNISKIP(uv) @@ -690,8 +753,16 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) /* Here, the input is considered to be well-formed, but it still could be a * problematic code point that is not allowed by the input parameters. */ if (uv >= UNICODE_SURROGATE_FIRST /* isn't problematic if < this */ - && (flags & (UTF8_DISALLOW_ILLEGAL_INTERCHANGE - |UTF8_WARN_ILLEGAL_INTERCHANGE))) + && ((flags & ( UTF8_DISALLOW_NONCHAR + |UTF8_DISALLOW_SURROGATE + |UTF8_DISALLOW_SUPER + |UTF8_DISALLOW_ABOVE_31_BIT + |UTF8_WARN_NONCHAR + |UTF8_WARN_SURROGATE + |UTF8_WARN_SUPER + |UTF8_WARN_ABOVE_31_BIT)) + || ( UNLIKELY(uv > MAX_NON_DEPRECATED_CP) + && ckWARN_d(WARN_DEPRECATED)))) { if (UNICODE_IS_SURROGATE(uv)) { @@ -711,34 +782,70 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) if ((flags & (UTF8_WARN_SUPER|UTF8_CHECK_ONLY)) == UTF8_WARN_SUPER && ckWARN_d(WARN_NON_UNICODE)) { - sv = sv_2mortal(Perl_newSVpvf(aTHX_ "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv)); + sv = sv_2mortal(Perl_newSVpvf(aTHX_ + "Code point 0x%04"UVXf" is not Unicode, may not be portable", + uv)); pack_warn = packWARN(WARN_NON_UNICODE); } -#ifndef EBCDIC /* Can never have the equivalent of FE nor FF on EBCDIC, since - not representable in UTF-EBCDIC */ - - /* The first byte being 0xFE or 0xFF is a subset of the SUPER code - * points. We test for these after the regular SUPER ones, and - * before possibly bailing out, so that the more dire warning - * overrides the regular one, if applicable */ - if ((*s0 & 0xFE) == 0xFE /* matches both FE, FF */ - && (flags & (UTF8_WARN_FE_FF|UTF8_DISALLOW_FE_FF))) + + /* The maximum code point ever specified by a standard was + * 2**31 - 1. Anything larger than that is a Perl extension that + * very well may not be understood by other applications (including + * earlier perl versions on EBCDIC platforms). On ASCII platforms, + * these code points are indicated by the first UTF-8 byte being + * 0xFE or 0xFF. We test for these after the regular SUPER ones, + * and before possibly bailing out, so that the slightly more dire + * warning will override the regular one. */ + if ( +#ifndef EBCDIC + (*s0 & 0xFE) == 0xFE /* matches both FE, FF */ +#else + /* The I8 for 2**31 (U+80000000) is + * \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA2\xA0\xA0\xA0\xA0\xA0\xA0 + * and it turns out that on all EBCDIC pages recognized that + * the UTF-EBCDIC for that code point is + * \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41 + * For the next lower code point, the 1047 UTF-EBCDIC is + * \xFE\x41\x41\x41\x41\x41\x41\x42\x73\x73\x73\x73\x73\x73 + * The other code pages differ only in the bytes following + * \x42. Thus the following works (the minimum continuation + * byte is \x41). */ + *s0 == 0xFE && send - s0 > 7 && ( s0[1] > 0x41 + || s0[2] > 0x41 + || s0[3] > 0x41 + || s0[4] > 0x41 + || s0[5] > 0x41 + || s0[6] > 0x41 + || s0[7] > 0x42) +#endif + && (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER + |UTF8_DISALLOW_ABOVE_31_BIT))) { - if ((flags & (UTF8_WARN_FE_FF|UTF8_CHECK_ONLY)) - == UTF8_WARN_FE_FF - && ckWARN_d(WARN_UTF8)) + if ( ! (flags & UTF8_CHECK_ONLY) + && (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER)) + && ckWARN_d(WARN_UTF8)) { - sv = sv_2mortal(Perl_newSVpvf(aTHX_ "Code point 0x%"UVXf" is not Unicode, and not portable", uv)); + sv = sv_2mortal(Perl_newSVpvf(aTHX_ + "Code point 0x%"UVXf" is not Unicode, and not portable", + uv)); pack_warn = packWARN(WARN_UTF8); } - if (flags & UTF8_DISALLOW_FE_FF) { + if (flags & UTF8_DISALLOW_ABOVE_31_BIT) { goto disallowed; } } -#endif + if (flags & UTF8_DISALLOW_SUPER) { goto disallowed; } + + /* The deprecated warning overrides any non-deprecated one */ + if (UNLIKELY(uv > MAX_NON_DEPRECATED_CP) && ckWARN_d(WARN_DEPRECATED)) + { + sv = sv_2mortal(Perl_newSVpvf(aTHX_ cp_above_legal_max, + uv, MAX_NON_DEPRECATED_CP)); + pack_warn = packWARN(WARN_DEPRECATED); + } } else if (UNICODE_IS_NONCHAR(uv)) { if ((flags & (UTF8_WARN_NONCHAR|UTF8_CHECK_ONLY)) == UTF8_WARN_NONCHAR @@ -840,6 +947,9 @@ the next possible position in C that could begin a non-malformed character. See L for details on when the REPLACEMENT CHARACTER is returned. +Code points above the platform's C will raise a deprecation warning, +unless those are turned off. + =cut */ @@ -915,6 +1025,9 @@ is set (if C isn't NULL) so that (S + C<*retlen>>) is the next possible position in C that could begin a non-malformed character. See L for details on when the REPLACEMENT CHARACTER is returned. +Code points above the platform's C will raise a deprecation warning, +unless those are turned off. + =cut */ @@ -1445,14 +1558,14 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_ * LENP will be set to the length in bytes of the string of changed characters * * The functions return the ordinal of the first character in the string of OUTP */ -#define CALL_UPPER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_toupper, "ToUc", "") -#define CALL_TITLE_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_totitle, "ToTc", "") -#define CALL_LOWER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tolower, "ToLc", "") +#define CALL_UPPER_CASE(uv, s, d, lenp) _to_utf8_case(uv, s, d, lenp, &PL_utf8_toupper, "ToUc", "") +#define CALL_TITLE_CASE(uv, s, d, lenp) _to_utf8_case(uv, s, d, lenp, &PL_utf8_totitle, "ToTc", "") +#define CALL_LOWER_CASE(uv, s, d, lenp) _to_utf8_case(uv, s, d, lenp, &PL_utf8_tolower, "ToLc", "") -/* This additionally has the input parameter SPECIALS, which if non-zero will - * cause this to use the SPECIALS hash for folding (meaning get full case +/* This additionally has the input parameter 'specials', which if non-zero will + * cause this to use the specials hash for folding (meaning get full case * folding); otherwise, when zero, this implies a simple case fold */ -#define CALL_FOLD_CASE(INP, OUTP, LENP, SPECIALS) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tofold, "ToCf", (SPECIALS) ? "" : NULL) +#define CALL_FOLD_CASE(uv, s, d, lenp, specials) _to_utf8_case(uv, s, d, lenp, &PL_utf8_tofold, "ToCf", (specials) ? "" : NULL) UV Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp) @@ -1472,7 +1585,7 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp) } uvchr_to_utf8(p, c); - return CALL_UPPER_CASE(p, p, lenp); + return CALL_UPPER_CASE(c, p, p, lenp); } UV @@ -1485,7 +1598,7 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp) } uvchr_to_utf8(p, c); - return CALL_TITLE_CASE(p, p, lenp); + return CALL_TITLE_CASE(c, p, p, lenp); } STATIC U8 @@ -1523,7 +1636,7 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) } uvchr_to_utf8(p, c); - return CALL_LOWER_CASE(p, p, lenp); + return CALL_LOWER_CASE(c, p, p, lenp); } UV @@ -1543,14 +1656,15 @@ Perl__to_fold_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const unsigned int f assert (! (flags & FOLD_FLAGS_LOCALE)); - if (c == MICRO_SIGN) { + if (UNLIKELY(c == MICRO_SIGN)) { converted = GREEK_SMALL_LETTER_MU; } #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \ || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \ || UNICODE_DOT_DOT_VERSION > 0) - else if ((flags & FOLD_FLAGS_FULL) && c == LATIN_SMALL_LETTER_SHARP_S) { - + else if ( (flags & FOLD_FLAGS_FULL) + && UNLIKELY(c == LATIN_SMALL_LETTER_SHARP_S)) + { /* If can't cross 127/128 boundary, can't return "ss"; instead return * two U+017F characters, as fc("\df") should eq fc("\x{17f}\x{17f}") * under those circumstances. */ @@ -1619,7 +1733,7 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags) /* Here, above 255. If no special needs, just use the macro */ if ( ! (flags & (FOLD_FLAGS_LOCALE|FOLD_FLAGS_NOMIX_ASCII))) { uvchr_to_utf8(p, c); - return CALL_FOLD_CASE(p, p, lenp, flags & FOLD_FLAGS_FULL); + return CALL_FOLD_CASE(c, p, p, lenp, flags & FOLD_FLAGS_FULL); } else { /* Otherwise, _to_utf8_fold_flags has the intelligence to deal with the special flags. */ @@ -1756,6 +1870,11 @@ Perl__is_utf8_mark(pTHX_ const U8 *p) /* =for apidoc to_utf8_case +Instead use the appropriate one of L, +L, +L, +or L. + C

contains the pointer to the UTF-8 string encoding the character that is being converted. This routine assumes that the character at C

is well-formed. @@ -1778,37 +1897,123 @@ mappings, like C<"utf8::ToSpecLower">. C is a string like C<"ToLower"> which means the swash C<%utf8::ToLower>. +Code points above the platform's C will raise a deprecation warning, +unless those are turned off. + =cut */ UV Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, const char *normal, const char *special) { + PERL_ARGS_ASSERT_TO_UTF8_CASE; + + return _to_utf8_case(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp, swashp, normal, special); +} + + /* change namve uv1 to 'from' */ +UV +S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, U8* ustrp, STRLEN *lenp, + SV **swashp, const char *normal, const char *special) +{ STRLEN len = 0; - const UV uv1 = valid_utf8_to_uvchr(p, NULL); - PERL_ARGS_ASSERT_TO_UTF8_CASE; + PERL_ARGS_ASSERT__TO_UTF8_CASE; + + /* For code points that don't change case, we already know that the output + * of this function is the unchanged input, so we can skip doing look-ups + * for them. Unfortunately the case-changing code points are scattered + * around. But there are some long consecutive ranges where there are no + * case changing code points. By adding tests, we can eliminate the lookup + * for all the ones in such ranges. This is currently done here only for + * just a few cases where the scripts are in common use in modern commerce + * (and scripts adjacent to those which can be included without additional + * tests). */ + + if (uv1 >= 0x0590) { + /* This keeps from needing further processing the code points most + * likely to be used in the following non-cased scripts: Hebrew, + * Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic, Devanagari, + * Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada, + * Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar */ + if (uv1 < 0x10A0) { + goto cases_to_self; + } - /* Note that swash_fetch() doesn't output warnings for these because it - * assumes we will */ - if (uv1 >= UNICODE_SURROGATE_FIRST) { - if (uv1 <= UNICODE_SURROGATE_LAST) { - if (ckWARN_d(WARN_SURROGATE)) { - const char* desc = (PL_op) ? OP_DESC(PL_op) : normal; - Perl_warner(aTHX_ packWARN(WARN_SURROGATE), - "Operation \"%s\" returns its argument for UTF-16 surrogate U+%04"UVXf"", desc, uv1); - } - } - else if (UNICODE_IS_SUPER(uv1)) { - if (ckWARN_d(WARN_NON_UNICODE)) { - const char* desc = (PL_op) ? OP_DESC(PL_op) : normal; - Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE), - "Operation \"%s\" returns its argument for non-Unicode code point 0x%04"UVXf"", desc, uv1); - } - } + /* The following largish code point ranges also don't have case + * changes, but khw didn't think they warranted extra tests to speed + * them up (which would slightly slow down everything else above them): + * 1100..139F Hangul Jamo, Ethiopic + * 1400..1CFF Unified Canadian Aboriginal Syllabics, Ogham, Runic, + * Tagalog, Hanunoo, Buhid, Tagbanwa, Khmer, Mongolian, + * Limbu, Tai Le, New Tai Lue, Buginese, Tai Tham, + * Combining Diacritical Marks Extended, Balinese, + * Sundanese, Batak, Lepcha, Ol Chiki + * 2000..206F General Punctuation + */ + + if (uv1 >= 0x2D30) { + + /* This keeps the from needing further processing the code points + * most likely to be used in the following non-cased major scripts: + * CJK, Katakana, Hiragana, plus some less-likely scripts. + * + * (0x2D30 above might have to be changed to 2F00 in the unlikely + * event that Unicode eventually allocates the unused block as of + * v8.0 2FE0..2FEF to code points that are cased. khw has verified + * that the test suite will start having failures to alert you + * should that happen) */ + if (uv1 < 0xA640) { + goto cases_to_self; + } + + if (uv1 >= 0xAC00) { + if (UNLIKELY(UNICODE_IS_SURROGATE(uv1))) { + if (ckWARN_d(WARN_SURROGATE)) { + const char* desc = (PL_op) ? OP_DESC(PL_op) : normal; + Perl_warner(aTHX_ packWARN(WARN_SURROGATE), + "Operation \"%s\" returns its argument for UTF-16 surrogate U+%04"UVXf"", desc, uv1); + } + goto cases_to_self; + } + + /* AC00..FAFF Catches Hangul syllables and private use, plus + * some others */ + if (uv1 < 0xFB00) { + goto cases_to_self; + + } + + if (UNLIKELY(UNICODE_IS_SUPER(uv1))) { + if ( UNLIKELY(uv1 > MAX_NON_DEPRECATED_CP) + && ckWARN_d(WARN_DEPRECATED)) + { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + cp_above_legal_max, uv1, MAX_NON_DEPRECATED_CP); + } + if (ckWARN_d(WARN_NON_UNICODE)) { + const char* desc = (PL_op) ? OP_DESC(PL_op) : normal; + Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE), + "Operation \"%s\" returns its argument for non-Unicode code point 0x%04"UVXf"", desc, uv1); + } + goto cases_to_self; + } +#ifdef HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C + if (UNLIKELY(uv1 + > HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C)) + { + + /* As of this writing, this means we avoid swash creation + * for anything beyond low Plane 1 */ + goto cases_to_self; + } +#endif + } + } /* Note that non-characters are perfectly legal, so no warning should - * be given */ + * be given. There are so few of them, that it isn't worth the extra + * tests to avoid swash creation */ } if (!*swashp) /* load on-demand */ @@ -1866,6 +2071,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, /* Here, there was no mapping defined, which means that the code point maps * to itself. Return the inputs */ + cases_to_self: len = UTF8SKIP(p); if (p != ustrp) { /* Don't copy onto itself */ Copy(p, ustrp, len, U8); @@ -1982,7 +2188,7 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags } } else { /* UTF-8, ord above 255 */ - result = CALL_UPPER_CASE(p, ustrp, lenp); + result = CALL_UPPER_CASE(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp); if (flags) { result = check_locale_boundary_crossing(p, result, ustrp, lenp); @@ -2053,7 +2259,7 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags } } else { /* UTF-8, ord above 255 */ - result = CALL_TITLE_CASE(p, ustrp, lenp); + result = CALL_TITLE_CASE(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp); if (flags) { result = check_locale_boundary_crossing(p, result, ustrp, lenp); @@ -2123,7 +2329,7 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags } } else { /* UTF-8, ord above 255 */ - result = CALL_LOWER_CASE(p, ustrp, lenp); + result = CALL_LOWER_CASE(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp); if (flags) { result = check_locale_boundary_crossing(p, result, ustrp, lenp); @@ -2206,7 +2412,7 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags) } } else { /* UTF-8, ord above 255 */ - result = CALL_FOLD_CASE(p, ustrp, lenp, flags & FOLD_FLAGS_FULL); + result = CALL_FOLD_CASE(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp, flags & FOLD_FLAGS_FULL); if (flags & FOLD_FLAGS_LOCALE) { @@ -2650,7 +2856,7 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m * (see lib/unicore/SpecCase.txt) The SWASHGET in lib/utf8_heavy.pl is * the lower-level routine, and it is similarly broken for returning * multiple values. --jhi - * For those, you should use to_utf8_case() instead */ + * For those, you should use S__to_utf8_case() instead */ /* Now SWASHGET is recasted into S_swatch_get in this file. */ /* Note: @@ -3323,7 +3529,7 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash) * Code could be written to automatically figure this out, similar to the * code that does this for multi-character folds, but this is the only case * where something like this is ever likely to happen, as all the single - * char folds to The 0-255 range are now quite settled. Instead there is a + * char folds to the 0-255 range are now quite settled. Instead there is a * little special code that is compiled only for this Unicode version. This * is smaller and didn't require much coding time to do. But this makes * this routine strongly tied to being used just for CaseFolding. If ever @@ -3829,7 +4035,10 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) /* May change: warns if surrogates, non-character code points, or * non-Unicode code points are in s which has length len bytes. Returns * TRUE if none found; FALSE otherwise. The only other validity check is - * to make sure that this won't exceed the string's length */ + * to make sure that this won't exceed the string's length. + * + * Code points above the platform's C will raise a deprecation + * warning, unless those are turned off. */ const U8* const e = s + len; bool ok = TRUE; @@ -3845,11 +4054,33 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) { STRLEN char_len; if (UTF8_IS_SUPER(s, e)) { - if (ckWARN_d(WARN_NON_UNICODE)) { + if ( ckWARN_d(WARN_NON_UNICODE) + || ( ckWARN_d(WARN_DEPRECATED) +#if defined(UV_IS_QUAD) + /* 2**63 and up meet these conditions provided we have + * a 64-bit word. */ +# ifdef EBCDIC + && *s == 0xFE && e - s >= UTF8_MAXBYTES + && s[1] >= 0x49 +# else + && *s == 0xFF && e -s >= UTF8_MAXBYTES + && s[2] >= 0x88 +# endif +#else /* Below is 32-bit words */ + /* 2**31 and above meet these conditions on all EBCDIC + * pages recognized for 32-bit platforms */ +# ifdef EBCDIC + && *s == 0xFE && e - s >= UTF8_MAXBYTES + && s[6] >= 0x43 +# else + && *s >= 0xFE +# endif +#endif + )) { /* A side effect of this function will be to warn */ (void) utf8n_to_uvchr(s, e - s, &char_len, UTF8_WARN_SUPER); - ok = FALSE; - } + ok = FALSE; + } } else if (UTF8_IS_SURROGATE(s, e)) { if (ckWARN_d(WARN_SURROGATE)) { diff --git a/utf8.h b/utf8.h index 4f01277..c57576b 100644 --- a/utf8.h +++ b/utf8.h @@ -136,8 +136,8 @@ END_EXTERN_C /* Native character to/from iso-8859-1. Are the identity functions on ASCII * platforms */ -#define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) -#define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) +#define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) (ch))) +#define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) (ch))) /* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC. We thus * consider it to be identical to UTF-8 on ASCII platforms. Strictly speaking @@ -145,12 +145,12 @@ END_EXTERN_C * because they are 8-bit encodings that serve the same purpose in Perl, and * rarely do we need to distinguish them. The term "NATIVE_UTF8" applies to * whichever one is applicable on the current platform */ -#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) -#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) +#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) (ch))) +#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) (ch))) /* Transforms in wide UV chars */ -#define UNI_TO_NATIVE(ch) (ch) -#define NATIVE_TO_UNI(ch) (ch) +#define UNI_TO_NATIVE(ch) ((UV) (ch)) +#define NATIVE_TO_UNI(ch) ((UV) (ch)) /* @@ -208,18 +208,26 @@ Perl's extended UTF-8 means we can have start bytes up to FF. * */ #define UVCHR_IS_INVARIANT(cp) OFFUNI_IS_INVARIANT(cp) +/* This defines the bits that are to be in the continuation bytes of a multi-byte + * UTF-8 encoded character that mark it is a continuation byte. */ +#define UTF_CONTINUATION_MARK 0x80 + /* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence * in UTF-8? This is the inverse of UTF8_IS_INVARIANT */ -#define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80) +#define UTF8_IS_CONTINUED(c) (((U8)c) & UTF_CONTINUATION_MARK) /* Is the byte 'c' the first byte of a multi-byte UTF8-8 encoded sequence? * This doesn't catch invariants (they are single-byte). It also excludes the * illegal overlong sequences that begin with C0 and C1. */ #define UTF8_IS_START(c) (((U8)c) >= 0xc2) +/* For use in UTF8_IS_CONTINUATION() below */ +#define UTF_IS_CONTINUATION_MASK 0xC0 + /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the * first byte thereof? */ -#define UTF8_IS_CONTINUATION(c) ((((U8)c) & 0xC0) == 0x80) +#define UTF8_IS_CONTINUATION(c) \ + ((((U8)c) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK) /* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence? Use * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to @@ -231,67 +239,73 @@ Perl's extended UTF-8 means we can have start bytes up to FF. * represent a code point > 255? */ #define UTF8_IS_ABOVE_LATIN1(c) ((U8)(c) >= 0xc4) -/* This defines the 1-bits that are to be in the first byte of a multi-byte - * UTF-8 encoded character that give the number of bytes that comprise the - * character. 'len' is the number of bytes in the multi-byte sequence. */ -#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : (0xFF & (0xFE << (7-(len))))) - -/* Masks out the initial one bits in a start byte, leaving the real data ones. - * Doesn't work on an invariant byte. 'len' is the number of bytes in the - * multi-byte sequence that comprises the character. */ -#define UTF_START_MASK(len) (((len) >= 7) ? 0x00 : (0x1F >> ((len)-2))) - -/* This defines the bits that are to be in the continuation bytes of a multi-byte - * UTF-8 encoded character that indicate it is a continuation byte. */ -#define UTF_CONTINUATION_MARK 0x80 - /* This is the number of low-order bits a continuation byte in a UTF-8 encoded * sequence contributes to the specification of the code point. In the bit * maps above, you see that the first 2 bits are a constant '10', leaving 6 of * real information */ #define UTF_ACCUMULATION_SHIFT 6 -/* 2**UTF_ACCUMULATION_SHIFT - 1 */ -#define UTF_CONTINUATION_MASK ((U8)0x3f) +/* ^? is defined to be DEL on ASCII systems. See the definition of toCTRL() + * for more */ +#define QUESTION_MARK_CTRL DEL_NATIVE -#if UVSIZE >= 8 -# define UTF8_QUAD_MAX UINT64_C(0x1000000000) +/* Surrogates, non-character code points and above-Unicode code points are + * problematic in some contexts. This allows code that needs to check for + * those to to quickly exclude the vast majority of code points it will + * encounter */ +#define isUTF8_POSSIBLY_PROBLEMATIC(c) ((U8) c >= 0xED) -/* Input is a true Unicode (not-native) code point */ -#define OFFUNISKIP(uv) ( (uv) < 0x80 ? 1 : \ - (uv) < 0x800 ? 2 : \ - (uv) < 0x10000 ? 3 : \ - (uv) < 0x200000 ? 4 : \ - (uv) < 0x4000000 ? 5 : \ - (uv) < 0x80000000 ? 6 : \ - (uv) < UTF8_QUAD_MAX ? 7 : UTF8_MAXBYTES ) -#else -/* No, I'm not even going to *TRY* putting #ifdef inside a #define */ -#define OFFUNISKIP(uv) ( (uv) < 0x80 ? 1 : \ - (uv) < 0x800 ? 2 : \ - (uv) < 0x10000 ? 3 : \ - (uv) < 0x200000 ? 4 : \ - (uv) < 0x4000000 ? 5 : \ - (uv) < 0x80000000 ? 6 : 7 ) -#endif +#endif /* EBCDIC vs ASCII */ -/* The maximum number of UTF-8 bytes a single Unicode character can - * uppercase/lowercase/fold into. Unicode guarantees that the maximum - * expansion is 3 characters. On ASCIIish platforms, the highest Unicode - * character occupies 4 bytes, therefore this number would be 12, but this is - * smaller than the maximum width a single above-Unicode character can occupy, - * so use that instead */ -#if UTF8_MAXBYTES < 12 -#error UTF8_MAXBYTES must be at least 12 +/* 2**UTF_ACCUMULATION_SHIFT - 1 */ +#define UTF_CONTINUATION_MASK ((U8) ((1U << UTF_ACCUMULATION_SHIFT) - 1)) + +/* Internal macro to be used only in this file to aid in constructing other + * publicly accessible macros. + * The number of bytes required to express this uv in UTF-8, for just those + * uv's requiring 2 through 6 bytes, as these are common to all platforms and + * word sizes. The number of bytes needed is given by the number of leading 1 + * bits in the start byte. There are 32 start bytes that have 2 initial 1 bits + * (C0-DF); there are 16 that have 3 initial 1 bits (E0-EF); 8 that have 4 + * initial 1 bits (F0-F8); 4 that have 5 initial 1 bits (F9-FB), and 2 that + * have 6 initial 1 bits (FC-FD). The largest number a string of n bytes can + * represent is (the number of possible start bytes for 'n') + * * (the number of possiblities for each start byte + * The latter in turn is + * 2 ** ( (how many continuation bytes there are) + * * (the number of bits of information each + * continuation byte holds)) + * + * If we were on a platform where we could use a fast find first set bit + * instruction (or count leading zeros instruction) this could be replaced by + * using that to find the log2 of the uv, and divide that by the number of bits + * of information in each continuation byte, adjusting for large cases and how + * much information is in a start byte for that length */ +#define __COMMON_UNI_SKIP(uv) \ + (UV) (uv) < (32 * (1U << ( UTF_ACCUMULATION_SHIFT))) ? 2 : \ + (UV) (uv) < (16 * (1U << (2 * UTF_ACCUMULATION_SHIFT))) ? 3 : \ + (UV) (uv) < ( 8 * (1U << (3 * UTF_ACCUMULATION_SHIFT))) ? 4 : \ + (UV) (uv) < ( 4 * (1U << (4 * UTF_ACCUMULATION_SHIFT))) ? 5 : \ + (UV) (uv) < ( 2 * (1U << (5 * UTF_ACCUMULATION_SHIFT))) ? 6 : + +/* Internal macro to be used only in this file. + * This adds to __COMMON_UNI_SKIP the details at this platform's upper range. + * For any-sized EBCDIC platforms, or 64-bit ASCII ones, we need one more test + * to see if just 7 bytes is needed, or if the maximum is needed. For 32-bit + * ASCII platforms, everything is representable by 7 bytes */ +#if defined(UV_IS_QUAD) || defined(EBCDIC) +# define __BASE_UNI_SKIP(uv) (__COMMON_UNI_SKIP(uv) \ + (UV) (uv) < ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT)) ? 7 : UTF8_MAXBYTES) +#else +# define __BASE_UNI_SKIP(uv) (__COMMON_UNI_SKIP(uv) 7) #endif -/* ^? is defined to be DEL on ASCII systems. See the definition of toCTRL() - * for more */ -#define QUESTION_MARK_CTRL DEL_NATIVE - -#define MAX_UTF8_TWO_BYTE 0x7FF +/* The next two macros use the base macro defined above, and add in the tests + * at the low-end of the range, for just 1 byte, yielding complete macros, + * publicly accessible. */ -#define UTF8_MAXBYTES_CASE UTF8_MAXBYTES +/* Input is a true Unicode (not-native) code point */ +#define OFFUNISKIP(uv) (OFFUNI_IS_INVARIANT(uv) ? 1 : __BASE_UNI_SKIP(uv)) /* @@ -302,15 +316,27 @@ encoded as UTF-8. C is a native (ASCII or EBCDIC) code point if less than =cut */ -#define UVCHR_SKIP(uv) OFFUNISKIP(uv) +#define UVCHR_SKIP(uv) ( UVCHR_IS_INVARIANT(uv) ? 1 : __BASE_UNI_SKIP(uv)) -/* Surrogates, non-character code points and above-Unicode code points are - * problematic in some contexts. This allows code that needs to check for - * those to to quickly exclude the vast majority of code points it will - * encounter */ -#define isUTF8_POSSIBLY_PROBLEMATIC(c) ((U8) c >= 0xED) +/* As explained in the comments for __COMMON_UNI_SKIP, 32 start bytes with + * UTF_ACCUMULATION_SHIFT bits of information each */ +#define MAX_UTF8_TWO_BYTE (32 * (1U << UTF_ACCUMULATION_SHIFT) - 1) -#endif /* EBCDIC vs ASCII */ +/* constrained by EBCDIC which has 5 bits per continuation byte */ +#define MAX_PORTABLE_UTF8_TWO_BYTE (32 * (1U << 5) - 1) + +/* The maximum number of UTF-8 bytes a single Unicode character can + * uppercase/lowercase/fold into. Unicode guarantees that the maximum + * expansion is UTF8_MAX_FOLD_CHAR_EXPAND characters, but any above-Unicode + * code point will fold to itself, so we only have to look at the expansion of + * the maximum Unicode code point. But this number may be less than the space + * occupied by a very large code point under Perl's extended UTF-8. We have to + * make it large enough to fit any single character. (It turns out that ASCII + * and EBCDIC differ in which is larger) */ +#define UTF8_MAXBYTES_CASE \ + (UTF8_MAXBYTES >= (UTF8_MAX_FOLD_CHAR_EXPAND * OFFUNISKIP(0x10FFFF)) \ + ? UTF8_MAXBYTES \ + : (UTF8_MAX_FOLD_CHAR_EXPAND * OFFUNISKIP(0x10FFFF))) /* Rest of these are attributes of Unicode and perl's internals rather than the * encoding, or happen to be the same in both ASCII and EBCDIC (at least at @@ -329,6 +355,17 @@ encoded as UTF-8. C is a native (ASCII or EBCDIC) code point if less than #define I8_TO_NATIVE(ch) I8_TO_NATIVE_UTF8(ch) #define NATIVE8_TO_UNI(ch) NATIVE_TO_LATIN1(ch) +/* This defines the 1-bits that are to be in the first byte of a multi-byte + * UTF-8 encoded character that mark it as a start byte and give the number of + * bytes that comprise the character. 'len' is the number of bytes in the + * multi-byte sequence. */ +#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : (0xFF & (0xFE << (7-(len))))) + +/* Masks out the initial one bits in a start byte, leaving the real data ones. + * Doesn't work on an invariant byte. 'len' is the number of bytes in the + * multi-byte sequence that comprises the character. */ +#define UTF_START_MASK(len) (((len) >= 7) ? 0x00 : (0x1F >> ((len)-2))) + /* Adds a UTF8 continuation byte 'new' of information to a running total code * point 'old' of all the continuation bytes so far. This is designed to be * used in a loop to convert from UTF-8 to the code point represented. Note @@ -384,7 +421,7 @@ encoded as UTF-8. C is a native (ASCII or EBCDIC) code point if less than * Note that the result can be larger than 255 if the input character is not * downgradable */ #define TWO_BYTE_UTF8_TO_NATIVE(HI, LO) \ - ( __ASSERT_(UTF8SKIP(HI) == 2) \ + ( __ASSERT_(PL_utf8skip[HI] == 2) \ __ASSERT_(UTF8_IS_CONTINUATION(LO)) \ UNI_TO_NATIVE(UTF8_ACCUMULATE((NATIVE_UTF8_TO_I8(HI) & UTF_START_MASK(2)), \ (LO)))) @@ -419,8 +456,6 @@ only) byte is pointed to by C. * above show, doesn't matter as to its implementation */ #define NATIVE_BYTE_IS_INVARIANT(c) UVCHR_IS_INVARIANT(c) -#define MAX_PORTABLE_UTF8_TWO_BYTE 0x3FF /* constrained by EBCDIC */ - /* The macros in the next 4 sets are used to generate the two utf8 or utfebcdic * bytes from an ordinal that is known to fit into exactly two (not one) bytes; * it must be less than 0x3FF to work across both encodings. */ @@ -445,9 +480,9 @@ only) byte is pointed to by C. /* The next two macros are used when the source should be a single byte * character; checked for under DEBUGGING */ #define UTF8_EIGHT_BIT_HI(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ - ((U8) __BASE_TWO_BYTE_HI(c, NATIVE_TO_LATIN1))) + ( __BASE_TWO_BYTE_HI(c, NATIVE_TO_LATIN1))) #define UTF8_EIGHT_BIT_LO(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ - ((U8) __BASE_TWO_BYTE_LO(c, NATIVE_TO_LATIN1))) + (__BASE_TWO_BYTE_LO(c, NATIVE_TO_LATIN1))) /* These final two macros in the series are used when the source can be any * code point whose UTF-8 is known to occupy 2 bytes; they are less efficient @@ -458,11 +493,11 @@ only) byte is pointed to by C. #define UTF8_TWO_BYTE_HI(c) \ (__ASSERT_((sizeof(c) == 1) \ || !(((WIDEST_UTYPE)(c)) & ~MAX_UTF8_TWO_BYTE)) \ - ((U8) __BASE_TWO_BYTE_HI(c, NATIVE_TO_UNI))) + (__BASE_TWO_BYTE_HI(c, NATIVE_TO_UNI))) #define UTF8_TWO_BYTE_LO(c) \ (__ASSERT_((sizeof(c) == 1) \ || !(((WIDEST_UTYPE)(c)) & ~MAX_UTF8_TWO_BYTE)) \ - ((U8) __BASE_TWO_BYTE_LO(c, NATIVE_TO_UNI))) + (__BASE_TWO_BYTE_LO(c, NATIVE_TO_UNI))) /* This is illegal in any well-formed UTF-8 in both EBCDIC and ASCII * as it is only in overlongs. */ @@ -475,11 +510,11 @@ only) byte is pointed to by C. * beginning of a utf8 character. Now that foo_utf8() determines that itself, * no need to do it again here */ -#define isIDFIRST_lazy_if(p,UTF) ((IN_BYTES || !UTF ) \ - ? isIDFIRST(*(p)) \ +#define isIDFIRST_lazy_if(p,UTF) ((IN_BYTES || !UTF) \ + ? isIDFIRST(*(p)) \ : isIDFIRST_utf8((const U8*)p)) -#define isWORDCHAR_lazy_if(p,UTF) ((IN_BYTES || (!UTF )) \ - ? isWORDCHAR(*(p)) \ +#define isWORDCHAR_lazy_if(p,UTF) ((IN_BYTES || (!UTF)) \ + ? isWORDCHAR(*(p)) \ : isWORDCHAR_utf8((const U8*)p)) #define isALNUM_lazy_if(p,UTF) isWORDCHAR_lazy_if(p,UTF) @@ -538,12 +573,16 @@ case any call to string overloading updates the internal UTF-8 encoding flag. #define UTF8_DISALLOW_SUPER 0x0200 /* Super-set of Unicode: code */ #define UTF8_WARN_SUPER 0x0400 /* points above the legal max */ -/* Code points which never were part of the original UTF-8 standard, the first - * byte of which is a FE or FF on ASCII platforms. If the first byte is FF, it - * will overflow a 32-bit word. If the first byte is FE, it will overflow a - * signed 32-bit word. */ -#define UTF8_DISALLOW_FE_FF 0x0800 -#define UTF8_WARN_FE_FF 0x1000 +/* Code points which never were part of the original UTF-8 standard, which only + * went up to 2 ** 31 - 1. Note that these all overflow a signed 32-bit word, + * The first byte of these code points is FE or FF on ASCII platforms. If the + * first byte is FF, it will overflow a 32-bit word. */ +#define UTF8_DISALLOW_ABOVE_31_BIT 0x0800 +#define UTF8_WARN_ABOVE_31_BIT 0x1000 + +/* For back compat, these old names are misleading for UTF_EBCDIC */ +#define UTF8_DISALLOW_FE_FF UTF8_DISALLOW_ABOVE_31_BIT +#define UTF8_WARN_FE_FF UTF8_WARN_ABOVE_31_BIT #define UTF8_CHECK_ONLY 0x2000 @@ -553,13 +592,14 @@ case any call to string overloading updates the internal UTF-8 encoding flag. #define UTF8_ALLOW_FFFF 0 #define UTF8_ALLOW_SURROGATE 0 -#define UTF8_DISALLOW_ILLEGAL_INTERCHANGE \ - (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR \ - |UTF8_DISALLOW_SURROGATE|UTF8_DISALLOW_FE_FF) +#define UTF8_DISALLOW_ILLEGAL_INTERCHANGE \ + ( UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR \ + |UTF8_DISALLOW_SURROGATE) #define UTF8_WARN_ILLEGAL_INTERCHANGE \ - (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE|UTF8_WARN_FE_FF) -#define UTF8_ALLOW_ANY \ - (~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE)) + (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE) +#define UTF8_ALLOW_ANY \ + (~( UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_DISALLOW_ABOVE_31_BIT \ + |UTF8_WARN_ILLEGAL_INTERCHANGE|UTF8_WARN_ABOVE_31_BIT)) #define UTF8_ALLOW_ANYUV \ (UTF8_ALLOW_EMPTY \ & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE)) @@ -605,14 +645,14 @@ case any call to string overloading updates the internal UTF-8 encoding flag. * let's be conservative and do as Unicode says. */ #define PERL_UNICODE_MAX 0x10FFFF -#define UNICODE_WARN_SURROGATE 0x0001 /* UTF-16 surrogates */ -#define UNICODE_WARN_NONCHAR 0x0002 /* Non-char code points */ -#define UNICODE_WARN_SUPER 0x0004 /* Above 0x10FFFF */ -#define UNICODE_WARN_FE_FF 0x0008 /* Above 0x10FFFF */ -#define UNICODE_DISALLOW_SURROGATE 0x0010 -#define UNICODE_DISALLOW_NONCHAR 0x0020 -#define UNICODE_DISALLOW_SUPER 0x0040 -#define UNICODE_DISALLOW_FE_FF 0x0080 +#define UNICODE_WARN_SURROGATE 0x0001 /* UTF-16 surrogates */ +#define UNICODE_WARN_NONCHAR 0x0002 /* Non-char code points */ +#define UNICODE_WARN_SUPER 0x0004 /* Above 0x10FFFF */ +#define UNICODE_WARN_ABOVE_31_BIT 0x0008 /* Above 0x7FFF_FFFF */ +#define UNICODE_DISALLOW_SURROGATE 0x0010 +#define UNICODE_DISALLOW_NONCHAR 0x0020 +#define UNICODE_DISALLOW_SUPER 0x0040 +#define UNICODE_DISALLOW_ABOVE_31_BIT 0x0080 #define UNICODE_WARN_ILLEGAL_INTERCHANGE \ (UNICODE_WARN_SURROGATE|UNICODE_WARN_NONCHAR|UNICODE_WARN_SUPER) #define UNICODE_DISALLOW_ILLEGAL_INTERCHANGE \ @@ -623,19 +663,31 @@ case any call to string overloading updates the internal UTF-8 encoding flag. #define UNICODE_ALLOW_SUPER 0 #define UNICODE_ALLOW_ANY 0 -#define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \ - (c) <= UNICODE_SURROGATE_LAST) -#define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACEMENT) -#define UNICODE_IS_BYTE_ORDER_MARK(c) ((c) == UNICODE_BYTE_ORDER_MARK) -#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) \ - /* The other noncharacters end in FFFE or FFFF, which \ - * the mask below catches both of, but beyond the last \ - * official unicode code point, they aren't \ - * noncharacters, since those aren't Unicode \ - * characters at all */ \ - || ((((c & 0xFFFE) == 0xFFFE)) && ! UNICODE_IS_SUPER(c))) -#define UNICODE_IS_SUPER(c) ((c) > PERL_UNICODE_MAX) -#define UNICODE_IS_FE_FF(c) ((c) > 0x7FFFFFFF) +/* This matches the 2048 code points between UNICODE_SURROGATE_FIRST (0xD800) and + * UNICODE_SURROGATE_LAST (0xDFFF) */ +#define UNICODE_IS_SURROGATE(uv) (((UV) (uv) & (~0xFFFF | 0xF800)) \ + == 0xD800) + +#define UNICODE_IS_REPLACEMENT(uv) ((UV) (uv) == UNICODE_REPLACEMENT) +#define UNICODE_IS_BYTE_ORDER_MARK(uv) ((UV) (uv) == UNICODE_BYTE_ORDER_MARK) + +/* Is 'uv' one of the 32 contiguous-range noncharacters? */ +#define UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv) ((UV) (uv) >= 0xFDD0 \ + && (UV) (uv) <= 0xFDEF) + +/* Is 'uv' one of the 34 plane-ending noncharacters 0xFFFE, 0xFFFF, 0x1FFFE, + * 0x1FFFF, ... 0x10FFFE, 0x10FFFF, given that we know that 'uv' is not above + * the Unicode legal max */ +#define UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv) \ + (((UV) (uv) & 0xFFFE) == 0xFFFE) + +#define UNICODE_IS_NONCHAR(uv) \ + ( UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv) \ + || ( LIKELY( ! UNICODE_IS_SUPER(uv)) \ + && UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv))) + +#define UNICODE_IS_SUPER(uv) ((UV) (uv) > PERL_UNICODE_MAX) +#define UNICODE_IS_ABOVE_31_BIT(uv) ((UV) (uv) > 0x7FFFFFFF) #define LATIN_SMALL_LETTER_SHARP_S LATIN_SMALL_LETTER_SHARP_S_NATIVE #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS \ diff --git a/utfebcdic.h b/utfebcdic.h index 1e4dc7c..10b666a 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -27,17 +27,25 @@ * invariant byte starts with 0 starts with 0 or 100 * continuation byte starts with 10 starts with 101 * start byte same in both: if the code point requires N bytes, - * then the leading N bits are 1, followed by a 0. (No - * trailing 0 for the very largest possible allocation - * in I8, far beyond the current Unicode standard's - * max, as shown in the comment later in this file.) + * then the leading N bits are 1, followed by a 0. If + * all 8 bits in the first byte are 1, the code point + * will occupy 14 bytes (compared to 13 in Perl's + * extended UTF-8). This is incompatible with what + * tr16 implies should be the representation of code + * points 2**30 and above, but allows Perl to be able + * to represent all code points that fit in a 64-bit + * word in either our extended UTF-EBCDIC or UTF-8. * 3) Use the algorithm in tr16 to convert each byte from step 2 into * final UTF-EBCDIC. This is done by table lookup from a table * constructed from the algorithm, reproduced in ebcdic_tables.h as * PL_utf2e, with its inverse being PL_e2utf. They are constructed so that * all EBCDIC invariants remain invariant, but no others do, and the first * byte of a variant will always have its upper bit set. But note that - * the upper bit of some invariants is also 1. + * the upper bit of some invariants is also 1. The table also is designed + * so that lexically comparing two UTF-EBCDIC-variant characters yields + * the Unicode code point order. (To get native code point order, one has + * to convert the latin1-range characters to their native code point + * value.) * * For example, the ordinal value of 'A' is 193 in EBCDIC, and also is 193 in * UTF-EBCDIC. Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 @@ -142,20 +150,24 @@ END_EXTERN_C #define I8_TO_NATIVE_UTF8(b) (__ASSERT_(FITS_IN_8_BITS(b)) PL_utf2e[(U8)(b)]) /* Transforms in wide UV chars */ -#define NATIVE_TO_UNI(ch) (FITS_IN_8_BITS(ch) ? NATIVE_TO_LATIN1(ch) : (ch)) -#define UNI_TO_NATIVE(ch) (FITS_IN_8_BITS(ch) ? LATIN1_TO_NATIVE(ch) : (ch)) +#define NATIVE_TO_UNI(ch) (FITS_IN_8_BITS(ch) ? NATIVE_TO_LATIN1(ch) : (UV) (ch)) +#define UNI_TO_NATIVE(ch) (FITS_IN_8_BITS(ch) ? LATIN1_TO_NATIVE(ch) : (UV) (ch)) /* How wide can a single UTF-8 encoded character become in bytes. */ /* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode - * non-negative integers in a binary format, even those above Unicode. */ -#define UTF8_MAXBYTES 7 + * non-negative integers in a binary format, even those above Unicode. 14 is + * the smallest number that covers 2**64 + * + * WARNING: This number must be in sync with the value in + * regen/charset_translations.pl. */ +#define UTF8_MAXBYTES 14 /* - The following table is adapted from tr16, it shows I8 encoding of Unicode code points. + The following table is adapted from tr16, it shows the I8 encoding of Unicode code points. - Unicode U32 Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte + Unicode U32 Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th Byte U+0000..U+007F 000000000xxxxxxx 0xxxxxxx U+0080..U+009F 00000000100xxxxx 100xxxxx U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx @@ -163,22 +175,20 @@ END_EXTERN_C U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx - U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx + U+4000000..U+3FFFFFFF 00uuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 11111110 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx - Note: The I8 transformation is valid for UCS-4 values X'0' to - X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space). +Beyond this, Perl uses an incompatible extension, similar to the one used in +regular UTF-8. There are now 14 bytes. A full 32 bits of information thus looks like this: + 1st Byte 2nd-7th 8th Byte 9th Byte 10th B 11th B 12th B 13th B 14th B +U+40000000..U+FFFFFFFF ttuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 11111111 10100000 101000tt 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx - */ +For 32-bit words, the 2nd through 7th bytes effectively function as leading +zeros. Above 32 bits, these fill up, with each byte yielding 5 bits of +information, so that with 13 continuation bytes, we can handle 65 bits, just +above what a 64 bit word can hold */ -/* Input is a true Unicode (not-native) code point */ -#define OFFUNISKIP(uv) ( (uv) < 0xA0 ? 1 : \ - (uv) < 0x400 ? 2 : \ - (uv) < 0x4000 ? 3 : \ - (uv) < 0x40000 ? 4 : \ - (uv) < 0x400000 ? 5 : \ - (uv) < 0x4000000 ? 6 : \ - (uv) < 0x40000000 ? 7 : UTF8_MAXBYTES ) +/* This is a fundamental property of UTF-EBCDIC */ #define OFFUNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0) /* It turns out that on EBCDIC platforms, the invariants are the characters @@ -188,13 +198,6 @@ END_EXTERN_C #define UVCHR_IS_INVARIANT(uv) cBOOL(FITS_IN_8_BITS(uv) \ && (PL_charclass[(U8) (uv)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL)))) -#define UVCHR_SKIP(uv) (UVCHR_IS_INVARIANT(uv) ? 1 : \ - (uv) < 0x400 ? 2 : \ - (uv) < 0x4000 ? 3 : \ - (uv) < 0x40000 ? 4 : \ - (uv) < 0x400000 ? 5 : \ - (uv) < 0x4000000 ? 6 : UTF8_MAXBYTES ) - /* UTF-EBCDIC semantic macros - We used to transform back into I8 and then * compare, but now only have to do a single lookup by using a bit in * l1_char_class_tab.h. @@ -202,8 +205,17 @@ END_EXTERN_C * definitions. */ #define UTF8_IS_START(c) _generic_isCC(c, _CC_UTF8_IS_START) + +#define UTF_IS_CONTINUATION_MASK 0xE0 + #define UTF8_IS_CONTINUATION(c) _generic_isCC(c, _CC_UTF8_IS_CONTINUATION) +/* The above instead could be written as this: +#define UTF8_IS_CONTINUATION(c) \ + (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \ + == UTF_CONTINUATION_MARK) + */ + /* Equivalent to ! UVCHR_IS_INVARIANT(c) */ #define UTF8_IS_CONTINUED(c) cBOOL(FITS_IN_8_BITS(c) \ && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL)))) @@ -221,26 +233,13 @@ END_EXTERN_C #define isUTF8_POSSIBLY_PROBLEMATIC(c) \ _generic_isCC(c, _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE) -/* Can't exceed 7 on EBCDIC platforms */ -#define UTF_START_MARK(len) (0xFF & (0xFE << (7-(len)))) - -#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2))) #define UTF_CONTINUATION_MARK 0xA0 -#define UTF_CONTINUATION_MASK ((U8)0x1f) #define UTF_ACCUMULATION_SHIFT 5 -/* The maximum number of UTF-8 bytes a single Unicode character can - * uppercase/lowercase/fold into. Unicode guarantees that the maximum - * expansion is 3 characters. On EBCDIC platforms, the highest Unicode - * character occupies 5 bytes, therefore this number is 15 */ -#define UTF8_MAXBYTES_CASE 15 - /* ^? is defined to be APC on EBCDIC systems. See the definition of toCTRL() * for more */ #define QUESTION_MARK_CTRL LATIN1_TO_NATIVE(0x9F) -#define MAX_UTF8_TWO_BYTE 0x3FF - /* * ex: set ts=8 sts=4 sw=4 et: */ diff --git a/util.c b/util.c index ab468fe..17b62dd 100644 --- a/util.c +++ b/util.c @@ -2714,6 +2714,15 @@ Perl_my_popen(pTHX_ const char *cmd, const char *mode) /* this is called in parent before the fork() */ void Perl_atfork_lock(void) +#if defined(USE_ITHREADS) +# ifdef USE_PERLIO + PERL_TSA_ACQUIRE(PL_perlio_mutex) +# endif +# ifdef MYMALLOC + PERL_TSA_ACQUIRE(PL_malloc_mutex) +# endif + PERL_TSA_ACQUIRE(PL_op_mutex) +#endif { #if defined(USE_ITHREADS) dVAR; @@ -2731,6 +2740,15 @@ Perl_atfork_lock(void) /* this is called in both parent and child after the fork() */ void Perl_atfork_unlock(void) +#if defined(USE_ITHREADS) +# ifdef USE_PERLIO + PERL_TSA_RELEASE(PL_perlio_mutex) +# endif +# ifdef MYMALLOC + PERL_TSA_RELEASE(PL_malloc_mutex) +# endif + PERL_TSA_RELEASE(PL_op_mutex) +#endif { #if defined(USE_ITHREADS) dVAR; @@ -4520,6 +4538,9 @@ Perl_parse_unicode_opts(pTHX_ const char **popt) Perl_croak(aTHX_ "Unknown Unicode option letter '%c'", *p); } } + else { + Perl_croak(aTHX_ "Invalid number '%s' for -C option.\n", p); + } } else { for (; *p; p++) { @@ -6577,6 +6598,28 @@ Perl_dump_c_backtrace(pTHX_ PerlIO* fp, int depth, int skip) #endif /* #ifdef USE_C_BACKTRACE */ +#ifdef PERL_TSA_ACTIVE + +/* pthread_mutex_t and perl_mutex are typedef equivalent + * so casting the pointers is fine. */ + +int perl_tsa_mutex_lock(perl_mutex* mutex) +{ + return pthread_mutex_lock((pthread_mutex_t *) mutex); +} + +int perl_tsa_mutex_unlock(perl_mutex* mutex) +{ + return pthread_mutex_unlock((pthread_mutex_t *) mutex); +} + +int perl_tsa_mutex_destroy(perl_mutex* mutex) +{ + return pthread_mutex_destroy((pthread_mutex_t *) mutex); +} + +#endif + /* * ex: set ts=8 sts=4 sw=4 et: */ diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template index b59037c..944d202 100644 --- a/vms/descrip_mms.template +++ b/vms/descrip_mms.template @@ -307,7 +307,7 @@ utils : $(utils1) $(utils2) $(utils3) $(utils4) $(utils5) extra.pods : miniperl @ @extra_pods.com -PERLDELTA_CURRENT = [.pod]perl5235delta.pod +PERLDELTA_CURRENT = [.pod]perl5236delta.pod $(PERLDELTA_CURRENT) : [.pod]perldelta.pod Copy/NoConfirm/Log $(MMS$SOURCE) $(PERLDELTA_CURRENT) diff --git a/win32/GNUmakefile b/win32/GNUmakefile index 120ead7..66c6415 100644 --- a/win32/GNUmakefile +++ b/win32/GNUmakefile @@ -66,7 +66,7 @@ INST_TOP := $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER := \5.23.5 +#INST_VER := \5.23.6 # # Comment this out if you DON'T want your perl installation to have @@ -1127,7 +1127,7 @@ utils: $(PERLEXE) ..\utils\Makefile copy ..\README.tw ..\pod\perltw.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5235delta.pod + copy ..\pod\perldelta.pod ..\pod\perl5236delta.pod $(PERLEXE) $(PL2BAT) $(UTILS) $(MINIPERL) -I..\lib ..\autodoc.pl .. $(MINIPERL) -I..\lib ..\pod\perlmodlib.PL -q .. @@ -1222,7 +1222,7 @@ distclean: realclean -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -cd $(PODDIR) && del /f *.html *.bat roffitall \ - perl5235delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ + perl5236delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ perlapi.pod perlbs2000.pod perlce.pod perlcn.pod perlcygwin.pod \ perldos.pod perlfreebsd.pod perlhaiku.pod perlhpux.pod \ perlhurd.pod perlintern.pod perlirix.pod perljp.pod perlko.pod \ diff --git a/win32/Makefile b/win32/Makefile index 19142ba..a226aaf 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -38,7 +38,7 @@ INST_TOP = $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER = \5.23.5 +#INST_VER = \5.23.6 # # Comment this out if you DON'T want your perl installation to have @@ -1216,7 +1216,7 @@ utils: $(PERLEXE) ..\utils\Makefile copy ..\README.tw ..\pod\perltw.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5235delta.pod + copy ..\pod\perldelta.pod ..\pod\perl5236delta.pod cd ..\win32 $(PERLEXE) $(PL2BAT) $(UTILS) $(MINIPERL) -I..\lib ..\autodoc.pl .. @@ -1313,7 +1313,7 @@ distclean: realclean -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -cd $(PODDIR) && del /f *.html *.bat roffitall \ - perl5235delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ + perl5236delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ perlapi.pod perlbs2000.pod perlce.pod perlcn.pod perlcygwin.pod \ perldos.pod perlfreebsd.pod perlhaiku.pod perlhpux.pod \ perlhurd.pod perlintern.pod perlirix.pod perljp.pod perlko.pod \ diff --git a/win32/config_H.gc b/win32/config_H.gc index 8480c77..2254974 100644 --- a/win32/config_H.gc +++ b/win32/config_H.gc @@ -1918,8 +1918,10 @@ * LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE * LONG_DOUBLE_IS_UNKNOWN_FORMAT * It is only defined if the system supports long doubles. */ @@ -1933,8 +1935,10 @@ #define LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN 2 #define LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 3 #define LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN 4 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN 5 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE 5 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE 7 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE 8 #define LONG_DOUBLE_IS_UNKNOWN_FORMAT -1 #endif diff --git a/win32/config_H.vc b/win32/config_H.vc index f1e8517..3d17f63 100644 --- a/win32/config_H.vc +++ b/win32/config_H.vc @@ -1912,8 +1912,10 @@ * LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN * LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN - * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE + * LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE * LONG_DOUBLE_IS_UNKNOWN_FORMAT * It is only defined if the system supports long doubles. */ @@ -1927,8 +1929,10 @@ #define LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN 2 #define LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 3 #define LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN 4 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN 5 -#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_LE 5 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE 6 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LE_BE 7 +#define LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_LE 8 #define LONG_DOUBLE_IS_UNKNOWN_FORMAT -1 #endif diff --git a/win32/makefile.mk b/win32/makefile.mk index dece68a..ff697a4 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -44,7 +44,7 @@ INST_TOP *= $(INST_DRV)\perl # versioned installation can be obtained by setting INST_TOP above to a # path that includes an arbitrary version string. # -#INST_VER *= \5.23.5 +#INST_VER *= \5.23.6 # # Comment this out if you DON'T want your perl installation to have @@ -1539,7 +1539,7 @@ utils: $(HAVEMINIPERL) ..\utils\Makefile copy ..\README.tw ..\pod\perltw.pod copy ..\README.vos ..\pod\perlvos.pod copy ..\README.win32 ..\pod\perlwin32.pod - copy ..\pod\perldelta.pod ..\pod\perl5235delta.pod + copy ..\pod\perldelta.pod ..\pod\perl5236delta.pod $(MINIPERL) -I..\lib $(PL2BAT) $(UTILS) $(MINIPERL) -I..\lib ..\autodoc.pl .. $(MINIPERL) -I..\lib ..\pod\perlmodlib.PL -q .. @@ -1635,7 +1635,7 @@ distclean: realclean -if exist $(LIBDIR)\Win32API rmdir /s /q $(LIBDIR)\Win32API -if exist $(LIBDIR)\XS rmdir /s /q $(LIBDIR)\XS -cd $(PODDIR) && del /f *.html *.bat roffitall \ - perl5235delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ + perl5236delta.pod perlaix.pod perlamiga.pod perlandroid.pod \ perlapi.pod perlbs2000.pod perlce.pod perlcn.pod perlcygwin.pod \ perldos.pod perlfreebsd.pod perlhaiku.pod perlhpux.pod \ perlhurd.pod perlintern.pod perlirix.pod perljp.pod perlko.pod \ diff --git a/win32/perllib.c b/win32/perllib.c index 0e44a24..cf7bf56 100644 --- a/win32/perllib.c +++ b/win32/perllib.c @@ -211,14 +211,8 @@ RunPerl(int argc, char **argv, char **env) { int exitstatus; PerlInterpreter *my_perl, *new_perl = NULL; - char *arg0 = argv[0]; - char *ansi = NULL; bool use_environ = (env == environ); - WCHAR widename[MAX_PATH]; - GetModuleFileNameW(NULL, widename, sizeof(widename)/sizeof(WCHAR)); - argv[0] = ansi = win32_ansipath(widename); - #ifdef PERL_GLOBAL_STRUCT #define PERLVAR(prefix,var,type) /**/ #define PERLVARA(prefix,var,type) /**/ @@ -269,11 +263,6 @@ RunPerl(int argc, char **argv, char **env) } #endif - /* Some RTLs may want to free argv[] after main() returns. */ - argv[0] = arg0; - if (ansi) - win32_free(ansi); - PERL_SYS_TERM(); return (exitstatus); diff --git a/win32/pod.mak b/win32/pod.mak index 8079195..2099dd1 100644 --- a/win32/pod.mak +++ b/win32/pod.mak @@ -41,12 +41,14 @@ POD = perl.pod \ perl5202delta.pod \ perl5203delta.pod \ perl5220delta.pod \ + perl5221delta.pod \ perl5230delta.pod \ perl5231delta.pod \ perl5232delta.pod \ perl5233delta.pod \ perl5234delta.pod \ perl5235delta.pod \ + perl5236delta.pod \ perl561delta.pod \ perl56delta.pod \ perl581delta.pod \ @@ -182,12 +184,14 @@ MAN = perl.man \ perl5202delta.man \ perl5203delta.man \ perl5220delta.man \ + perl5221delta.man \ perl5230delta.man \ perl5231delta.man \ perl5232delta.man \ perl5233delta.man \ perl5234delta.man \ perl5235delta.man \ + perl5236delta.man \ perl561delta.man \ perl56delta.man \ perl581delta.man \ @@ -323,12 +327,14 @@ HTML = perl.html \ perl5202delta.html \ perl5203delta.html \ perl5220delta.html \ + perl5221delta.html \ perl5230delta.html \ perl5231delta.html \ perl5232delta.html \ perl5233delta.html \ perl5234delta.html \ perl5235delta.html \ + perl5236delta.html \ perl561delta.html \ perl56delta.html \ perl581delta.html \ @@ -464,12 +470,14 @@ TEX = perl.tex \ perl5202delta.tex \ perl5203delta.tex \ perl5220delta.tex \ + perl5221delta.tex \ perl5230delta.tex \ perl5231delta.tex \ perl5232delta.tex \ perl5233delta.tex \ perl5234delta.tex \ perl5235delta.tex \ + perl5236delta.tex \ perl561delta.tex \ perl56delta.tex \ perl581delta.tex \ -- 2.7.4