From 7ea3cd407b6ec2a3e424bdfbc486b6e01d6d28bd Mon Sep 17 00:00:00 2001 From: Nick Ing-Simmons Date: Tue, 19 Dec 2000 21:34:42 +0000 Subject: [PATCH] Integrate mainline. p4raw-id: //depot/perlio@8202 --- AUTHORS | 2 +- Changes | 906 +++++++++++++++++++++++++++++ Configure | 2 +- Porting/config.sh | 4 +- Porting/config_H | 152 ++--- config_h.SH | 150 ++--- configure.com | 7 +- djgpp/config.over | 4 +- embed.h | 14 +- embed.pl | 5 +- ext/DB_File/Changes | 11 + ext/DB_File/DB_File.pm | 6 +- ext/DB_File/DB_File.xs | 61 +- ext/DB_File/dbinfo | 6 +- ext/Fcntl/Fcntl.pm | 2 +- ext/Fcntl/Fcntl.xs | 254 ++++---- ext/Sys/Syslog/Syslog.pm | 4 +- global.sym | 2 + lib/Net/Ping.pm | 2 +- mg.c | 22 +- objXSUB.h | 4 + op.c | 6 + os2/OS2/ExtAttr/Makefile.PL | 2 +- os2/OS2/PrfDB/Makefile.PL | 2 +- os2/OS2/Process/Makefile.PL | 2 +- os2/OS2/REXX/DLL/Makefile.PL | 2 +- os2/OS2/REXX/Makefile.PL | 2 +- patchlevel.h | 2 +- perlapi.c | 16 + perlio.h | 6 +- perliol.h | 3 +- pod/perlapi.pod | 18 +- pod/perldiag.pod | 28 +- pod/perlfaq3.pod | 45 +- pod/perlintern.pod | 24 + pod/perlmodlib.pod | 26 +- pod/perltoc.pod | 455 +++++++++------ pp_ctl.c | 11 +- pp_hot.c | 27 +- pp_sys.c | 57 +- proto.h | 5 +- regcomp.c | 848 +++++++++++---------------- regcomp.h | 39 +- regcomp.sym | 19 - regexec.c | 1314 +++++++++++++++++++++--------------------- regnodes.h | 301 ++++------ sv.c | 2 - t/base/commonsense.t | 3 +- t/lib/glob-basic.t | 2 +- t/op/64bitint.t | 28 +- t/op/goto_xs.t | 20 +- t/op/utf8decode.t | 2 + t/pragma/utf8.t | 407 +++++++------ t/pragma/warn/pp_sys | 17 + uconfig.h | 178 +++--- uconfig.sh | 77 ++- utils/h2xs.PL | 2 +- vms/ext/DCLsym/Makefile.PL | 2 +- vms/ext/Stdio/Makefile.PL | 2 +- vms/gen_shrfls.pl | 2 +- 60 files changed, 3356 insertions(+), 2268 deletions(-) diff --git a/AUTHORS b/AUTHORS index de7a0e0..e3bc2af 100644 --- a/AUTHORS +++ b/AUTHORS @@ -32,7 +32,7 @@ Andrew Wilcox Andy Dougherty Anno Siegel Anthony David -Anton Berezin +Anton Berezin Art Green Artur Barrie Slaymaker diff --git a/Changes b/Changes index b2fba9e..3a873d3 100644 --- a/Changes +++ b/Changes @@ -32,6 +32,912 @@ Version v5.7.1 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 8199] By: jhi on 2000/12/19 18:35:07 + Log: Microperl tweaks. + Branch: perl + ! sv.c uconfig.h uconfig.sh +____________________________________________________________________________ +[ 8198] By: jhi on 2000/12/19 18:29:59 + Log: Regen Configure, nitfix uconfig.sh (d_vendorarch is needed). + Branch: perl + ! Configure Porting/Glossary Porting/config.sh Porting/config_H + ! config_h.SH pod/perltoc.pod uconfig.h uconfig.sh +____________________________________________________________________________ +[ 8197] By: jhi on 2000/12/19 17:55:29 + Log: In VMS embedded perls couldn't access the statically built Socket, + from Charles Lane. + Branch: perl + ! configure.com +____________________________________________________________________________ +[ 8196] By: jhi on 2000/12/19 17:49:50 + Log: Subject: [PATCH perl@8143] DB_File-1.75 (was RE: [8104] DB_File) + From: "Paul Marquess" + Date: Sun, 17 Dec 2000 19:11:44 -0000 + Message-ID: <000801c0685d$3224e5a0$a20a140a@bfs.phone.com> + Branch: perl + ! ext/DB_File/Changes ext/DB_File/DB_File.pm + ! ext/DB_File/DB_File.xs ext/DB_File/dbinfo +____________________________________________________________________________ +[ 8195] By: jhi on 2000/12/19 17:47:53 + Log: Subject: [patch perl@8150] h2xs SYNOPSIS + From: Jonathan Stowe + Date: Mon, 18 Dec 2000 10:24:38 +0000 (GMT) + Message-ID: + Branch: perl + ! utils/h2xs.PL +____________________________________________________________________________ +[ 8194] By: jhi on 2000/12/19 17:46:28 + Log: Subject: Re: useless use of void context work-around + From: andreas.koenig@anima.de (Andreas J. Koenig) + Date: 16 Dec 2000 15:13:36 +0100 + Message-ID: + + Document (comment) the q(di ds ig) trick in the code. + Branch: perl + ! op.c +____________________________________________________________________________ +[ 8193] By: jhi on 2000/12/19 17:10:57 + Log: Subject: [ID 20001215.004] Sys::Syslog::xlate doesn't handle LOG_EMERG + From: "Mark J. Reed" + Date: Fri, 15 Dec 2000 21:22:29 -0500 (EST) + Message-Id: <200012160222.VAA13986@strange.turner.com> + Branch: perl + ! ext/Sys/Syslog/Syslog.pm +____________________________________________________________________________ +[ 8192] By: jhi on 2000/12/19 17:07:45 + Log: Subject: [PATCH] Re: [PATCH] strtoq, strtou(q|ll|l) testing + From: Nicholas Clark + Date: Sat, 16 Dec 2000 19:03:13 +0000 + Message-ID: <20001216190313.D68304@plum.flirble.org> + Branch: perl + ! t/op/64bitint.t +____________________________________________________________________________ +[ 8191] By: jhi on 2000/12/19 17:06:13 + Log: Subject: [ID 20001218.005] Not OK: perl v5.7.0 +DEVEL8148 on powerpc-machten 4.1.4 + From: Dominic Dunlop + Date: Mon, 18 Dec 2000 12:00:15 +0100 + Message-Id: + + This patchlet is needed in order that perl can be statically linked. + Branch: perl + ! regexec.c +____________________________________________________________________________ +[ 8190] By: jhi on 2000/12/19 17:03:08 + Log: Subject: [PATCH perl@8133] finding PerlIO symbols for VMS + From: "Craig A. Berry" + Date: Sun, 17 Dec 2000 00:18:35 -0600 + Message-Id: + Branch: perl + ! perlio.h vms/gen_shrfls.pl +____________________________________________________________________________ +[ 8189] By: jhi on 2000/12/19 16:20:28 + Log: Subject: [DOC PATCH: perl@8150, 5.6.1-TRIAL1] update list of lang. sensitive editors/IDES + From: Prymmer/Kahn + Date: Tue, 19 Dec 2000 08:08:31 -0800 (PST) + Message-ID: + + A better version of #8188. + Branch: perl + ! pod/perlfaq3.pod +____________________________________________________________________________ +[ 8188] By: jhi on 2000/12/19 15:57:06 + Log: (Replaced by #8189) + + Subject: [DOC PATCH: perl@7953] update list of lang. sensitive editors/IDES + Date: Mon, 18 Dec 2000 08:03:34 -0800 (PST) + From: Prymmer/Kahn + Message-ID: + Subject: Re: [DOC PATCH: perl@7953] update list of lang. sensitive editors/IDES + From: Ronald J Kimball + Date: Mon, 18 Dec 2000 11:10:45 -0500 + Message-ID: <20001218111044.B180222@linguist.thayer.dartmouth.edu> + Branch: perl + ! pod/perlfaq3.pod +____________________________________________________________________________ +[ 8187] By: jhi on 2000/12/19 15:54:19 + Log: Email address fix for Anton Berezin. + Branch: perl + ! AUTHORS +____________________________________________________________________________ +[ 8186] By: jhi on 2000/12/19 15:38:54 + Log: Subject: [PATCH perl@8102] cygwin port + From: "Eric Fifer" + Date: Thu, 14 Dec 2000 13:41:29 -0000 + Message-Id: <200012141340.NAA54236@mailhost1.dircon.co.uk> + + When compiling modules the data item that is being imported + from libperl.dll needs to be tagged as imported/shared data: + extern __declspec(dllimport) PerlIO_funcs PerlIO_pending; + Branch: perl + ! perliol.h +____________________________________________________________________________ +[ 8185] By: jhi on 2000/12/19 14:53:24 + Log: Regen uconfig.h and uconfig.sh. + Branch: perl + ! uconfig.h uconfig.sh +____________________________________________________________________________ +[ 8184] By: jhi on 2000/12/18 20:43:49 + Log: Comments work so much better when they are closed. + Branch: perl + ! regcomp.c +____________________________________________________________________________ +[ 8183] By: jhi on 2000/12/18 18:04:02 + Log: Some compilers (e.g. HP-UX) can't switch on 64-bit integers. + Fixes the bug 20001218.016. + Branch: perl + ! regcomp.c +____________________________________________________________________________ +[ 8182] By: gsar on 2000/12/18 09:53:47 + Log: delete spurious files + Branch: maint-5.6/perl + - lib/CGI/eg/make_links.pl lib/CGI/eg/wilogo.gif vos/config.def + - vos/config.h vos/config_h.SH_orig +____________________________________________________________________________ +[ 8181] By: gsar on 2000/12/18 09:46:08 + Log: regen perltoc + Branch: maint-5.6/perl + ! pod/buildtoc.PL pod/perl.pod pod/perlapi.pod pod/perltoc.pod +____________________________________________________________________________ +[ 8180] By: gsar on 2000/12/18 09:20:27 + Log: integrate changes#7924..7926,7946,7952 from mainline + Branch: maint-5.6/perl + !> lib/CPAN.pm lib/CPAN/FirstTime.pm lib/ExtUtils/MM_Unix.pm + !> lib/File/stat.pm t/lib/class-struct.t +____________________________________________________________________________ +[ 8179] By: gsar on 2000/12/18 08:55:54 + Log: integrate changes#7889,7890,7900,7903,7904,7907,7910,7917, + 7918,7919,7988,8907 from mainline (various) + Branch: maint-5.6/perl + +> t/lib/class-struct.t + !> MANIFEST README.amiga ext/Sys/Syslog/Syslog.pm gv.c + !> lib/Class/Struct.pm pod/perlipc.pod pod/perltie.pod + !> t/lib/syslfs.t t/op/lfs.t utils/perlcc.PL +____________________________________________________________________________ +[ 8178] By: gsar on 2000/12/18 08:16:30 + Log: avoid redefinition warnings on windows due to sys/socket.h getting + #included before win32.h + Branch: maint-5.6/perl + ! win32/include/sys/socket.h +____________________________________________________________________________ +[ 8177] By: gsar on 2000/12/18 05:24:04 + Log: make regen_headers; fix POSIX.xs problems; remove outdated + code from sys/socket.h that makes build fail now + Branch: maint-5.6/perl + ! ext/POSIX/POSIX.xs global.sym objXSUB.h perlapi.c + ! pod/perlapi.pod + !> win32/include/sys/socket.h +____________________________________________________________________________ +[ 8176] By: gsar on 2000/12/18 05:20:17 + Log: update Changes + Branch: maint-5.6/perl + ! Changes patchlevel.h +____________________________________________________________________________ +[ 8175] By: gsar on 2000/12/18 04:57:48 + Log: integrate changes#7643,7646..7649,7651..7654,7658,7659, + 7661..7665,7667..7669,7671,7673,7676,7677,7681..7683, + 7689..7697,7699..7701,7703,7705,7714,7715,7718..7723, + 7725,7726,7729..7732,7737,7748,7749,7758,7759,7761,7773, + 7775,7776,7782,7785..7787,7804,7807,7808,7810,7811,7816, + 7823,7825,7838 + Branch: maint-5.6/perl + +> lib/File/Spec/Epoc.pm + !> (integrate 88 files) +____________________________________________________________________________ +[ 8174] By: gsar on 2000/12/18 03:53:09 + Log: integrate changes#7602,7604..7611,7614,7616..7619,7621..7623, + 7625..7629,7631..7634,7637,7639,7642 from mainline + Branch: maint-5.6/perl + +> README.solaris + !> (integrate 26 files) +____________________________________________________________________________ +[ 8173] By: gsar on 2000/12/18 03:37:02 + Log: integrate changes#7472,7474..7479,7481,7485,7489,7493,7494,7496, + 7497,7499..7503,7505..7507,7509..7513,7515..7523,7526..7534, + 7536,7540,7542,7544..7546,7549,7553,7556,7557,7559,7561..7563, + 7565,7568..7572,7576,7578..7589,9592..7594,7596..7601 from mainline + Branch: maint-5.6/perl + +> t/lib/tie-refhash.t t/lib/tie-substrhash.t + - MAINTAIN + !> (integrate 111 files) +____________________________________________________________________________ +[ 8172] By: jhi on 2000/12/18 02:49:27 + Log: Regen pods. + Branch: perl + ! pod/perlmodlib.pod pod/perltoc.pod +____________________________________________________________________________ +[ 8171] By: gsar on 2000/12/18 02:49:24 + Log: integrate changes#7447,7448,7450,7454,7456,7457,7460,7462, + 7465..7471 from mainline + + Remains of the old UTF-8 API, utf8_to_uv_chk(): didn't link + in platforms that strictly require all the symbols being present + at link time. + + Subject: [PATCH: perl@7446] restore missing d_stdio_cnt_lval to VMS + + Subject: [ID 20001025.011] [PATCH] t/io/open.t perl@7369[ 7350] breaks VMS perl + + Subject: [ID 20001026.006] C gives uninitialized warning + + Subject: [PATCH] todo + + Subject: [ID 20001027.002] Patch 7380 followup - Perl_modfl *must* be defined + + Use $sort, $uniq (and $tr) consistently as wondered + by Nicholas Clark. + + Too enthusiastic editing in #7460. + + The reëntrant version shouldn't be needed unless USE_PURE_BISON. + + Upgrade to CPAN 1.58_55. + Subject: CPAN.pm status + + Subject: [ID 20001027.005] Nit in perlos2.pod - space needs deleted on line 118 + + Make target reordering to avoid pointless re-makes. + Subject: Re: Total re-make of 'make okfile' after 7451 ? + + Subject: [ID 20001027.010] [PATCH] Add info on building CPAN modules to README.dos + + Subject: DOC PATCH 5.6.0 + + Add the repository doc by Malcolm, Sarathy, and by Simon, + name as suggested by Michael Bletzinger . + Branch: maint-5.6/perl + +> Porting/repository.pod + !> Configure MANIFEST Makefile.SH README.dos README.os2 + !> config_h.SH configure.com embed.h embed.pl handy.h lib/CPAN.pm + !> lib/CPAN/FirstTime.pm perl.h pod/perlfunc.pod pod/perltodo.pod + !> pp.c proto.h t/io/open.t t/op/assignwarn.t toke.c +____________________________________________________________________________ +[ 8169] By: gsar on 2000/12/18 02:33:34 + Log: integrate changes#7416,7417,7420..7422,7424,7426..7429,7431..7433, + 7435..7441,7445 from mainline + + Make the UTF-8 decoding stricter and more verbose when + malformation happens. This involved adding an argument + to utf8_to_uv_chk(), which involved changing its prototype, + and prefer STRLEN over I32 for the UTF-8 length, which as + a domino effect necessitated changing the prototypes of + scan_bin(), scan_oct(), scan_hex(), and reg_uni(). + The stricter UTF-8 decoding checking uses Markus Kuhn's + UTF-8 Decode Stress Tester from + http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + + Run vms/vms_yfix.pl, should have done that after changing + perly.c in #7382. + + Subject: [PATCH 5.7.0] static linking with uninstalled perl + + (Replaced by #7440.) + Subject: Re: [ID 20001022.001] Not OK: perl v5.7.0 +DEVEL7368 on i686-linux 2.2.16 + + Fix the bug ID 20001024.005, the bug introduced by #7416. + + Subject: Re: [ID 20001023.003] PATCH perlfaq5 [perl-current] + + Fix the bug reported in + From: andreas.koenig@anima.de (Andreas J. Koenig) + Also make is_utf8_char() stricter. + + Missed the header file changes from #7425. + + Check if stdio supports tweaking lval and cnt simultaneously. + Subject: PATCH (Re: PerlIO - Configure tweak for Linux/glibc?) + + Stratus VOS updates from Paul Green. + + Podify README.epoc and README.vos. + + Add targets to Makefile.SH, most importantly + 'regen_all' which also remembers to update vms/perly*. + + Subject: Minor update to find2perl, for portability + + Subject: patch 7416 breaks sv.c on AIX and HP-UX (patch included) + + Subject: [ID 20001024.007] [PATCH] "Dump local *FH" causes SEGV + + Rename UTF8LEN() to be UNISKIP(), too confusing to have + UTF8LEN() and UTF8SKIP(). + + Allow poking holes at the UTF-8 decoding strictness. + + Continue the internal UTF-8 API tweaking. + Rename utf8_to_uv_chk() back to utf8_to_uv() because it's + used much more than the simpler API, now called utf8_to_uv_simple(). + Still not quite happy with API, too much partial duplication + of functionality. + + A new version of making the syslog test more robust. + (Replaces #7421.) + Subject: Re: [ID 20001022.001] Not OK: perl v5.7.0 +DEVEL7368 on i686-linux 2.2.16 + + buildtoc target tweaks. + + Integrate with vmsperl #7430 by Charles Bailey: + + Cleanup from prior patch (Charles Lane?): + - improve handling of MFDs in Basename and Path + - default to no xsubpp line # munging when building debug images + Branch: maint-5.6/perl + +> vos/config.alpha.def vos/config.alpha.h vos/config.ga.def + +> vos/config.ga.h vos/configure_perl.cm vos/install_perl.cm + !> (integrate 67 files) +____________________________________________________________________________ +[ 8168] By: gsar on 2000/12/18 02:05:49 + Log: integrate changes#7512,7733 from mainline (regex bugfixes) + + Subject: [ID 20001031.004] Uninitialized auto variable in regcomp.c + From: Martin Husemann + + Subject: [PATCH 5.7.0] restore match data on backtracing + From: Ilya Zakharevich + Branch: maint-5.6/perl + !> regcomp.c regexec.c t/op/re_tests +____________________________________________________________________________ +[ 8167] By: gsar on 2000/12/18 01:55:22 + Log: integrate changes#7858,7986 from mainline + + C in pseudo-fork()ed process may diddle + parent's memory; fix it by keeping track of the actual pad + offset rather than a raw pointer (this change is probably also + relevant to non-ithreads case to avoid fallout from reallocs of + the pad array, but is currently only enabled for the ithreads + case in the interests of minimal disruption to existing "well + tested" code) + + fix open(FOO, ">&MYSOCK") failure under Windows 9x (problem is + due to the notorious GetFileType() bug in Windows 9x, which fstat() + tickles) + Branch: maint-5.6/perl + !> embed.h embed.pl global.sym objXSUB.h perlapi.c pp_ctl.c + !> proto.h scope.c scope.h sv.c t/op/fork.t win32/perlhost.h + !> win32/win32.c win32/win32.h win32/win32sck.c +____________________________________________________________________________ +[ 8166] By: gsar on 2000/12/18 01:52:59 + Log: integrate changes#7626,7632,7717,7738,7814,7817,7902,7912,7915 + from mainline (xsubpp and ExtUtils::LibList fixups, various + other small items) + Branch: maint-5.6/perl + !> emacs/cperl-mode.el emacs/ptags lib/ExtUtils/Liblist.pm + !> lib/ExtUtils/MM_VMS.pm lib/ExtUtils/MakeMaker.pm + !> lib/ExtUtils/xsubpp lib/unicode/syllables.txt minimod.pl + !> pod/perlfunc.pod pod/perlxs.pod pod/perlxstut.pod t/op/split.t + !> win32/bin/search.pl +____________________________________________________________________________ +[ 8165] By: gsar on 2000/12/18 01:28:45 + Log: integrate changes#7533,7563,7611,7623 from mainline (various + malloc.c embellishments) + Branch: maint-5.6/perl + !> malloc.c pod/perldiag.pod +____________________________________________________________________________ +[ 8164] By: gsar on 2000/12/18 01:23:33 + Log: integrate changes#7419,7806,8129 from mainline (various h2xs + fixups) + Branch: maint-5.6/perl + !> utils/h2xs.PL +____________________________________________________________________________ +[ 8163] By: gsar on 2000/12/18 01:17:50 + Log: integrate changes#7493,7599,7803 from mainline (various perlbug + fixups) + Branch: maint-5.6/perl + !> Makefile.SH utils/perlbug.PL +____________________________________________________________________________ +[ 8162] By: gsar on 2000/12/18 00:25:43 + Log: always export Perl_deb() (it is required by re.xs whether + Perl is built with or without -DDEBUGGING) + Branch: maint-5.6/perl + ! makedef.pl +____________________________________________________________________________ +[ 8161] By: gsar on 2000/12/18 00:23:38 + Log: integrate change#7414 from mainline + + Undo the basename() part of #7412 since the lib/basename + tests would need upgrading too. + + squelch two tests in tr.t that rely on tr/// paranoia change + that's not in 5.6.x + Branch: maint-5.6/perl + ! t/op/tr.t + !> lib/File/Basename.pm +____________________________________________________________________________ +[ 8160] By: gsar on 2000/12/18 00:05:30 + Log: missing change in previous integrate + Branch: maint-5.6/perl + !> README.aix +____________________________________________________________________________ +[ 8159] By: gsar on 2000/12/18 00:03:38 + Log: integrate changes#7205..7210,7212,7214..7219,7222,7223,7225,7226, + 7228,7230..7241,7243,7346,7347,7350..7354,7356,7358..7360,7362, + 7363,7365..7368,7370..7374,7376..7386,7391,7393..7399,7304..7408, + 7410..7413 from mainline + Branch: maint-5.6/perl + +> README.aix hints/nonstopux.sh lib/unicode/Is/DCmedial.pl + +> t/lib/tie-splice.t + - lib/unicode/Is/DCinital.pl + !> (integrate 112 files) +____________________________________________________________________________ +[ 8158] By: jhi on 2000/12/17 23:04:24 + Log: Subject: [PATCHES] RE: perl@8150 + From: "Gerrit P. Haase" + Date: Sun, 17 Dec 2000 21:46:39 +0100 + Message-ID: <3A3D343F.13566.1ACA7D93@localhost> + + Neither cygwin has a getpwuid() one can trust on. + Branch: perl + ! t/lib/glob-basic.t +____________________________________________________________________________ +[ 8157] By: jhi on 2000/12/17 23:01:54 + Log: More MAN.PODS => {} fixes. + Branch: perl + ! os2/OS2/ExtAttr/Makefile.PL os2/OS2/PrfDB/Makefile.PL + ! os2/OS2/Process/Makefile.PL os2/OS2/REXX/DLL/Makefile.PL + ! os2/OS2/REXX/Makefile.PL +____________________________________________________________________________ +[ 8156] By: gsar on 2000/12/17 22:49:13 + Log: integrate changes#7069..7077,7079,7081..7087,7090,7092,7093, + 7096..7104,7109..7117,7119..7124,7126,7128,7129,7133,7134, + 7136..7139,7141..7146,7148,7149,7151,7153..7155,7157,7158, + 7160,7161,7164,7165,7169..7178,7180..7191,7193..7197,7199, + 7201,7204 from mainline + Branch: maint-5.6/perl + !> (integrate 121 files) +____________________________________________________________________________ +[ 8155] By: jhi on 2000/12/17 22:30:58 + Log: Subject: [PATCH perl@8133] fix-up for VMS extensions + From: "Craig A. Berry" + Date: Sun, 17 Dec 2000 13:09:28 -0600 + Message-Id: + + MAN.PODS => ' ' is naughty. + Branch: perl + ! vms/ext/DCLsym/Makefile.PL vms/ext/Stdio/Makefile.PL +____________________________________________________________________________ +[ 8154] By: nick on 2000/12/17 22:07:13 + Log: MULTIPLICITY nit. + Branch: perl + ! mg.c +____________________________________________________________________________ +[ 8153] By: gsar on 2000/12/17 21:23:05 + Log: integrate changes#7017..7019,7021..7025,7027..7036,7038,7039, + 7041..7044,7046..7048,7050..7061,7063,7066..7067,7069..7074 + from mainline + + Document the SvIOK_.*UV(). + + Update Unicode todo list. + + Guard against bad string->int conversion for quads. + + Subject: small apidoc fix + + Subject: [PATCH] Tie::StdHandle did not know about 3-arg open + + Subject: [PATCH] Tied filehandle documentation + + Subject: [PATCH] Modernize Opcode.pm documentation + + Make Data::Dumper (non-XS) to work with changed semantics of ref(). + Subject: Re: Undocumented(?) change to "ref" semantics in 5.7.0 + [applied even though said semantics didn't change in 5.6.x] + + Subject: [PATCH@7014] \G in non-/g is well-defined now ... right? + + Subject: Re: [ID 20000905.001] Assertion failed: file "toke.c", line 202 + + Fix the URL, but the server is still missing in action. + Subject: [ID 20000905.002] perlfaq1.pod URL error + + Subject: [ID 20000903.001] \w in utf8-strings + + Fix the ccversion detection for 5.1 and beyond. + Subject: [ID 20000907.007] Not OK: perl v5.7.0 +devel-7030 on alpha-dec_osf 4.0f + + Subject: [PATCH 5.7.0] perl5db.pl [Was: Re: Debugger question] + + Subject: [ID 20000904.008] Tiny fix for perldiag + + Subject: Re: [ID 20000906.004] segfault with bad perl statement + + Subject: Re: [ID 20000907.007] Not OK: perl v5.7.0 +devel-7030 on alpha-dec_osf 4.0f + + Subject: [ID 20000908.002] perlipc documentation bug. + + Subject: [PATCH lib/Benchmark.pm] + + Re-allow vec() for characters > 255. + Subject: [PATCH] Re: [ID 20000907.005] Not OK: perl v5.7.0 +devel-7030 on alpha-dec_osf-perlio 4.0f (UNINSTALLED) + + Do away with memory models cruft. Sorry, PDP users. + + Continue #7041. + + Subject: [PATCH (or RFC): 5.7.0] make the ran_tests intermediate file 8.3 friendly + + Subject: [PATCH: 5.7.0] proper setting for isnan for DECC 5.3 + + Upgrade to CPAN 1.57_65, from Andreas König. + + Upgrade to podlators-1.03 (Pod::Man 1.07 and Pod::Text 2.05), + by Russ Allbery. + + Silence t/pod/*.t about alternate quote-mappings now implemented + by Pod::Text, from Brad Appleton. + + Modern Borland C now seems to have anon unions for info.wProcessorArchitecture + Subject: borland C++ win32.c tweak + + C<@a = @b = split(...)> optimization coredumps under ithreads + (missed a spot when fixing up op_pmreplroot hack for ithreads) + + Document the SvUTF8*(). + + Subject: [PATCH] Perl 5.6.0, 5.7.0 ... vms/test.com to eliminate spurious NL's in test output + + Subject: RE: [Patch 5.7.0] Removing -ldb from the core build + + Do in VMS as the #7054 does. + + Subject: [patch] perlfunc.pod -- POSIX::sigpause should be POSIX::pause + + Subject: [ID 20000911.008] Not OK: perl v5.7.0 +DEVEL7048 on os2-64int-ld 2.30 (UNINSTALLED) + + Subject: [patch: perl@7045] vms updates + + Test for the #7049. + Subject: Re: [PATCH] Re: [ID 20000910.001] Not OK: perl v5.7.0 +DEVEL7044 on i686-linux 2.2.16-raid (UNINSTALLED) + + Break up the myconfig lines a bit. + Subject: perlbug/perl -V output format + + Subject: [ID 20000911.011] misplaced typemap in perlxs.pod + + The #7054 truncated Configure badly. + + change#6327 didn't quite go all the way to enable USE_SOCKETS_AS_HANDLES + initialization in all the threads on Windows + + Allow for whitespace between "#" and "line" in cpp output. + Subject: [PATCH] Re: Problems compiling bleadperl on Unicos 9 + + Remove vestiges of tr//CU. + Subject: [ID 20000912.009] perlunicode.pod still mentions tr///CU + + The return value of setlocale must be copied away. + Subject: [ID 20000913.001] Heap corruption in Perl_init_i18nl10n + + Allow chop() and chomp() to be overridden. + Subject: [PATCH] Re: [ID 20000911.006] I can override glob but not chop? + + Hints optimization. + Subject: Minor nit + + Subject: [PATCH] de-wall t/README + + Subject: Re: Two advertising clauses need to be removed + Branch: maint-5.6/perl + !> (integrate 75 files) +____________________________________________________________________________ +[ 8152] By: gsar on 2000/12/17 20:30:11 + Log: integrate changes#6945,6947,6949..6954,6956,6958,6959,6961, + 6964..6972,6977..6981..6984,6987,6988,6991,6994,6997, + 6999..7001,7003..7005,7007,7009,7011,7012 from mainline + + Don't attach -ld to the archname if pointless. + + Document UNTIE in a very minimalistic way. + + POSIX doesn't report long double values under -Duselongdouble + when the long doubles are "real" (bigger than doubles). + + More author updates. + + Try to deduce NV_MAX. Really should be Configure fodder. + + :: not allowed in pathnames, change to . + Subject: [PATCH perl@6938] cygwin port + + Forget about NV_MAX (#6951). Various floating point tweaks, + ideas from Eric Fifer, Yitzchak, Alan, and Spider. + + Move the Solaris 7 scan to use64bitall, make the + failure to find 64-bot sparc libc to mention the + possibility of being in an intel, from Lupe and Alan. + + Regen perltoc. + + AUTHORS tweaks, from Peter Prymmer. + + More address tweaking. + + Small tweaks all over. + + File::Temp patches from Andreas König, + + Subject: [PATCH perl@6962] 2 more vms.c fix-ups and status + + Subject: CPAN.pm beta 1.57_57 for the core + + Part of the solution. + Subject: Re: [ID 20000807.004] [PATCH] conditional breakpoints leak memory + + Subject: [PATCH@6961] Fix misleading example in perlretut.pod + + Subject: [PATCH lib/overload.pm] Sanaty checking of arguments to overload::constant + + Add the overload warnings to perldiag. + + Drop unused argument. + Subject: Re: [ID 20000831.034] overload::constant and number of arguments. + + Subject: Nit in Configure (bleadperl@6961) + + Update to PodParser 1.18, from Brad Appleton. + + Subject: [ID 20000901.017] [PATCH] Basic test failure in an untidy world + + Subject: [PATCH: 6948] add SCNfldbl to configure.com + + Document UNTIE. Also tweak implementation to suppress the 'inner references' + warning when UNTIE exists and instead pass the cound of extra references to + the UNTIE method. + + Rename the PRIElfbl, PRIX64, etc, to be PRIEUfldbl, PRIXU64, + so that case-ignoring systems like DCL can tell them from + PRIefldbl and PRIx64. Apply Merijn's ccversion patches. + + Subject: Re: [PATCH lib/overload.pm] Sanaty checking of arguments to overload::constant + + Feature ordering tweak. + + Regen perltoc. + + Subject: [PATCH] Fix vec() / utf8 (was Re: bitvec ops still broken with utf8 -- or not?) + + Subject: Re: [PATCH perl@6962] 2 more vms.c fix-ups and status + + Subject: http:// in L<> + + Detypo. + + change#6791 accidentally clobbered change#6710, put it back + + Only the first line, thank you very much. + + Subject: [PATCH: 6996] minimal removal of 8 bit chrs from perlebcdic.pod + plus rework the http: spots as suggested by Tom Christiansen, + plus regen perltoc. + + Undo part of change 6489 which looks like a bulk edit which + changed _all_ gv_efullname3() calls to gv_efullname4() calls. + The supressing of main:: on return from select() is undesirable. + + Apparently avoiding the swapping is too costly. + + Various Configure nits by Philip Newton, + plus the ebcdic one by me. + + Make certain cc is set before trying to run it. + + If overloaded %{} etc. return the object do not loop. + Thus sub deref { $_[0] } functions if object is wanted type. + + Update perlhist. + + More %{} and other deref special casing - do not pass to 'nomethod'. + Branch: maint-5.6/perl + !> (integrate 59 files) +____________________________________________________________________________ +[ 8151] By: gsar on 2000/12/17 19:14:38 + Log: integrate changes#6903,6905..6907,6909,6911..6913,6915,6917,6918, + 6920..6926,6928..6930,6934..6937,6939,6940,6942..6944 from mainline + + Subject: [PATCH perl@6889] Chuck Lane's OpenVMS piping improvements + + Make the epsilon to be relative, not absolute. + + Put back the flags dump as reasoned in + Subject: Re: [PATCH] Glob dumping + + Introduce ccname to keep track of what compiler kind of we have. + + Subject: Re: [ID 20000829.020] perl -e 'package; print __PACKAGE__' core dumps + + Put back the slice accidentally removed by #6907. + + Reset archname and archname64 always, forcing them be + recomputed at each Configure run, make Configure and + the hints files agree on the naming of largefiles variables. + + Don't say "Perl 5.0 source kit". + + Subject: [PATCH] fix misc cast warnings + + Subject: typos in pods + + NVs not necessarily doubles, as pointed out by Yitzchak. + + Subject: [PATCH 6889] add a few ldbl formats to configure.com + + Subject: [ID 20000830.036] [DOC] chom?p %hash not documented + + Better options for rsync. + + Subject: [PATCH perl@6889] fix Storable on VMS by fixing my_fwrite() + + Subject: Re: not OK, 6919 on Alpha VMS V 7.1 w/ DECC 6.0-001 + + Subject: [PATCH] Re: UNTIE method + + A better fix for the Socket building problem from Craig Berry. + + Retract the dummy test, skip the security tests (instead of failing), + explain what the warnings mean. + + Heap decorruption. + Subject: [PATCH] Fix for miniperl coredump on Solaris with -Duselongdouble + + Update to Unicode 3.0.1. + + Missed one Unicode file. + + Subject: Re: typos in pods + + The #6929 was too skimpy. + + sscanf() may be the only way to read long doubles from strings. + + Reveal Borland's isnan. + Subject: build with BC++ tweak + + Issue useful diagnostic on unknown pod commands. + Subject: [PATCH lib/Pod/Man.pm] Re: [ID 20000830.048] + + Subject: [PATCH] Re: [ID 20000830.048] Not OK: perl v5.7.0 +DEVEL6938 on i686-linux 2.2.13 + + Clarify the third case of ftmp-security warnings. + + Make -Dusemorebits find long doubles in Solaris. + + Wrap the test in eval. + Branch: maint-5.6/perl + +> lib/unicode/BidiMirr.txt lib/unicode/CaseFold.txt + +> lib/unicode/PropList.txt lib/unicode/README.perl + +> lib/unicode/UCD301.html lib/unicode/UCDFF301.html + +> lib/unicode/Unicode.301 vms/vmspipe.com + - lib/unicode/Props.txt lib/unicode/UCD300.html + - lib/unicode/Unicode.300 lib/unicode/Unicode3.html + !> (integrate 305 files) +____________________________________________________________________________ +[ 8150] By: jhi on 2000/12/17 18:47:57 + Log: Uncheckedin generated files. + Branch: perl + ! global.sym perlapi.c pod/perlapi.pod +____________________________________________________________________________ +[ 8149] By: jhi on 2000/12/17 18:41:22 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ +[ 8148] By: jhi on 2000/12/17 18:39:16 + Log: Subject: [PATCH] Fcntl constants speedup + From: Nicholas Clark + Date: Sun, 17 Dec 2000 16:29:24 +0000 + Message-ID: <20001217162924.E97668@plum.flirble.org> + + Use IVs for the Fcntl constants instead of NVs. + Branch: perl + ! ext/Fcntl/Fcntl.pm ext/Fcntl/Fcntl.xs t/op/goto_xs.t +____________________________________________________________________________ +[ 8147] By: jhi on 2000/12/17 18:33:41 + Log: Add test for #8145 (binmode() warning), add warning for + ioctl() and sockpair(), document them. (fileno() cannot + be tripwired with the same kind of warning because + 'defined fileno($foo)' seems to be an idiom.) + Branch: perl + ! pod/perldiag.pod pp_sys.c t/pragma/warn/pp_sys +____________________________________________________________________________ +[ 8146] By: gsar on 2000/12/17 18:09:08 + Log: update Changes + Branch: maint-5.6/perl + ! Changes +____________________________________________________________________________ +[ 8145] By: jhi on 2000/12/17 17:39:35 + Log: Subject: [PATCH] Re: The long awaited feature ... + From: Simon Cozens + Date: Sun, 17 Dec 2000 12:31:56 +0000 + Message-ID: <20001217123156.A3891@deep-dark-truthful-mirror.perlhacker.org> + + Add a warning to binmode() about using bad filehandles + (can happen e.g. if someone forgets the filehandle argument) + Branch: perl + ! pp_sys.c +____________________________________________________________________________ +[ 8144] By: jhi on 2000/12/17 17:33:48 + Log: Subject: [patch perl@8133] Typo in my Net::Ping doc patch :( + From: Jonathan Stowe + Date: Sun, 17 Dec 2000 17:08:10 +0000 (GMT) + Message-ID: + Branch: perl + ! lib/Net/Ping.pm +____________________________________________________________________________ +[ 8143] By: jhi on 2000/12/17 05:31:37 + Log: Polymorphic regexps. + + Fixes at least the bugs 20001028.003 (both of them...) and + 20001108.001. The bugs 20001114.001 and 20001205.014 seem + also to be fixed by now, probably already before this patch. + Branch: perl + ! embed.h embed.pl mg.c objXSUB.h pp_ctl.c pp_hot.c proto.h + ! regcomp.c regcomp.h regcomp.sym regexec.c regnodes.h sv.c + ! t/op/utf8decode.t t/pragma/utf8.t +____________________________________________________________________________ +[ 8142] By: jhi on 2000/12/16 17:16:05 + Log: Subject: [patch perl@8102] dos/djgpp update + From: Laszlo Molnar + Date: Sat, 16 Dec 2000 01:40:52 +0100 + Message-ID: <20001216014052.A335@freemail.hu> + Branch: perl + ! djgpp/config.over t/base/commonsense.t +____________________________________________________________________________ +[ 8141] By: jhi on 2000/12/16 17:09:27 + Log: Few uncheckedin files. + Branch: perl + ! global.sym perlapi.c pod/perlapi.pod pod/perlintern.pod +____________________________________________________________________________ +[ 8140] By: nick on 2000/12/15 22:14:31 + Log: Integrate mainline + Branch: perlio + !> (integrate 53 files) +____________________________________________________________________________ +[ 8139] By: jhi on 2000/12/15 19:49:49 + Log: One more IVUV tweak from Nicholas Clark. + Branch: perl + ! sv.c +____________________________________________________________________________ +[ 8138] By: jhi on 2000/12/15 19:17:06 + Log: Return of the IVUV-preservation, now seems to be happy even + in Digital UNIX (the broken strtoul brokenness detection + seems to have been the fly in the ointment). + Branch: perl + ! Configure Porting/Glossary Porting/config.sh Porting/config_H + ! config_h.SH configure.com embed.h embed.pl epoc/config.sh + ! objXSUB.h op.c perl.h pp.c pp_hot.c proto.h sv.c sv.h + ! t/lib/peek.t t/op/cmp.t t/op/numconvert.t uconfig.h + ! vos/config.alpha.def vos/config.alpha.h vos/config.ga.def + ! vos/config.ga.h win32/config.bc win32/config.gc + ! win32/config.vc +____________________________________________________________________________ +[ 8137] By: jhi on 2000/12/15 18:12:14 + Log: Metaconfig unit change for #8136. + Branch: metaconfig + ! U/modified/d_strtoul.U + Branch: metaconfig/U/perl + ! d_strtoull.U d_strtouq.U +____________________________________________________________________________ +[ 8136] By: jhi on 2000/12/15 18:11:35 + Log: I don't think it's sensible or portable to test the strtou* + on /^-/ strings. + Branch: perl + ! Configure +____________________________________________________________________________ +[ 8135] By: jhi on 2000/12/15 17:18:49 + Log: Metaconfig unit change for #8134. + Branch: metaconfig + ! U/modified/d_strtoul.U +____________________________________________________________________________ +[ 8134] By: jhi on 2000/12/15 17:14:13 + Log: If longsize is 8 we don't need a LL suffix for integer constants. + Branch: perl + ! Configure config_h.SH +____________________________________________________________________________ +[ 8133] By: jhi on 2000/12/15 16:00:23 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 8132] By: jhi on 2000/12/15 15:44:16 Log: Some compilers get huffy if you do not cast a const pointer to a non-const when assigning. diff --git a/Configure b/Configure index 504495c..b655e62 100755 --- a/Configure +++ b/Configure @@ -20,7 +20,7 @@ # $Id: Head.U,v 3.0.1.9 1997/02/28 15:02:09 ram Exp $ # -# Generated on Fri Dec 15 20:31:25 EET 2000 [metaconfig 3.0 PL70] +# Generated on Tue Dec 19 20:00:06 EET 2000 [metaconfig 3.0 PL70] # (with additional metaconfig patches by perlbug@perl.org) cat >c1$$ < @@ -1335,6 +1348,13 @@ */ #define HAS_FSTATFS /**/ +/* HAS_FSYNC: + * This symbol, if defined, indicates that the fsync routine is + * available to write a file's modified data and attributes to + * permanent storage. + */ +#define HAS_FSYNC /**/ + /* HAS_FTELLO: * This symbol, if defined, indicates that the ftello routine is * available to ftell beyond 32 bits (useful for ILP32 hosts). @@ -1488,6 +1508,17 @@ */ #define HAS_GETPROTOENT /**/ +/* HAS_GETPGRP: + * This symbol, if defined, indicates that the getpgrp routine is + * available to get the current process group. + */ +/* USE_BSD_GETPGRP: + * This symbol, if defined, indicates that getpgrp needs one + * arguments whereas USG one needs none. + */ +#define HAS_GETPGRP /**/ +/*#define USE_BSD_GETPGRP / **/ + /* HAS_GETPROTOBYNAME: * This symbol, if defined, indicates that the getprotobyname() * routine is available to look up protocols by their name. @@ -1797,6 +1828,15 @@ */ #define HAS_SANE_MEMCMP /**/ +/* HAS_SBRK_PROTO: + * This symbol, if defined, indicates that the system provides + * a prototype for the sbrk() function. Otherwise, it is up + * to the program to supply one. Good guesses are + * extern void* sbrk _((int)); + * extern void* sbrk _((size_t)); + */ +#define HAS_SBRK_PROTO /**/ + /* HAS_SEM: * This symbol, if defined, indicates that the entire sem*(2) library is * supported. @@ -1834,6 +1874,18 @@ */ #define HAS_SETPROTOENT /**/ +/* HAS_SETPGRP: + * This symbol, if defined, indicates that the setpgrp routine is + * available to set the current process group. + */ +/* USE_BSD_SETPGRP: + * This symbol, if defined, indicates that setpgrp needs two + * arguments whereas USG one needs none. See also HAS_SETPGID + * for a POSIX interface. + */ +#define HAS_SETPGRP /**/ +#define USE_BSD_SETPGRP /**/ + /* HAS_SETPROCTITLE: * This symbol, if defined, indicates that the setproctitle routine is * available to set process title. @@ -2105,6 +2157,12 @@ */ /*#define HAS_STRTOQ / **/ +/* HAS_STRTOUL: + * This symbol, if defined, indicates that the strtoul routine is + * available to provide conversion of strings to unsigned long. + */ +#define HAS_STRTOUL /**/ + /* HAS_STRTOULL: * This symbol, if defined, indicates that the strtoull routine is * available to convert strings to unsigned long longs. @@ -2595,6 +2653,17 @@ #define RD_NODATA -1 #define EOF_NONBLOCK +/* NEED_VA_COPY: + * This symbol, if defined, indicates that the system stores + * the variable argument list datatype, va_list, in a format + * that cannot be copied by simple assignment, so that some + * other means must be used when copying is required. + * As such systems vary in their provision (or non-provision) + * of copying mechanisms, handy.h defines a platform- + * independent macro, Perl_va_copy(src, dst), to do the job. + */ +/*#define NEED_VA_COPY / **/ + /* Netdb_host_t: * This symbol holds the type used for the 1st argument * to gethostbyaddr(). @@ -2944,6 +3013,12 @@ */ #define STARTPERL "#!/opt/perl/bin/perl" /**/ +/* STDCHAR: + * This symbol is defined to be the type of char used in stdio.h. + * It has the values "unsigned char" or "char". + */ +#define STDCHAR unsigned char /**/ + /* HAS_STDIO_STREAM_ARRAY: * This symbol, if defined, tells that there is an array * holding the stdio streams. @@ -3162,79 +3237,4 @@ #define PERL_XS_APIVERSION "5.7.0" #define PERL_PM_APIVERSION "5.005" -/* HAS_GETPGRP: - * This symbol, if defined, indicates that the getpgrp routine is - * available to get the current process group. - */ -/* USE_BSD_GETPGRP: - * This symbol, if defined, indicates that getpgrp needs one - * arguments whereas USG one needs none. - */ -#define HAS_GETPGRP /**/ -/*#define USE_BSD_GETPGRP / **/ - -/* HAS_SETPGRP: - * This symbol, if defined, indicates that the setpgrp routine is - * available to set the current process group. - */ -/* USE_BSD_SETPGRP: - * This symbol, if defined, indicates that setpgrp needs two - * arguments whereas USG one needs none. See also HAS_SETPGID - * for a POSIX interface. - */ -#define HAS_SETPGRP /**/ -#define USE_BSD_SETPGRP /**/ - -/* HAS_STRTOUL: - * This symbol, if defined, indicates that the strtoul routine is - * available to provide conversion of strings to unsigned long. - */ -#define HAS_STRTOUL /**/ - -/* STDCHAR: - * This symbol is defined to be the type of char used in stdio.h. - * It has the values "unsigned char" or "char". - */ -#define STDCHAR unsigned char /**/ - -/* HAS__FWALK: - * This symbol, if defined, indicates that the _fwalk system call is - * available to apply a function to all the file handles. - */ -/*#define HAS__FWALK / **/ - -/* FCNTL_CAN_LOCK: - * This symbol, if defined, indicates that fcntl() can be used - * for file locking. Normally on Unix systems this is defined. - * It may be undefined on VMS. - */ -#define FCNTL_CAN_LOCK /**/ - -/* HAS_FSYNC: - * This symbol, if defined, indicates that the fsync routine is - * available to write a file's modified data and attributes to - * permanent storage. - */ -#define HAS_FSYNC /**/ - -/* HAS_SBRK_PROTO: - * This symbol, if defined, indicates that the system provides - * a prototype for the sbrk() function. Otherwise, it is up - * to the program to supply one. Good guesses are - * extern void* sbrk _((int)); - * extern void* sbrk _((size_t)); - */ -#define HAS_SBRK_PROTO /**/ - -/* NEED_VA_COPY: - * This symbol, if defined, indicates that the system stores - * the variable argument list datatype, va_list, in a format - * that cannot be copied by simple assignment, so that some - * other means must be used when copying is required. - * As such systems vary in their provision (or non-provision) - * of copying mechanisms, handy.h defines a platform- - * independent macro, Perl_va_copy(src, dst), to do the job. - */ -/*#define NEED_VA_COPY / **/ - #endif diff --git a/config_h.SH b/config_h.SH index 8ab759d..596faf9 100644 --- a/config_h.SH +++ b/config_h.SH @@ -1216,6 +1216,12 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un #define CPPRUN "$cpprun" #define CPPLAST "$cpplast" +/* HAS__FWALK: + * This symbol, if defined, indicates that the _fwalk system call is + * available to apply a function to all the file handles. + */ +#$d__fwalk HAS__FWALK /**/ + /* HAS_ACCESS: * This manifest constant lets the C program know that the access() * system call is available to check for accessibility using real UID/GID. @@ -1313,6 +1319,13 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_endsent HAS_ENDSERVENT /**/ +/* FCNTL_CAN_LOCK: + * This symbol, if defined, indicates that fcntl() can be used + * for file locking. Normally on Unix systems this is defined. + * It may be undefined on VMS. + */ +#$d_fcntl_can_lock FCNTL_CAN_LOCK /**/ + /* HAS_FD_SET: * This symbol, when defined, indicates presence of the fd_set typedef * in @@ -1355,6 +1368,13 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_fstatfs HAS_FSTATFS /**/ +/* HAS_FSYNC: + * This symbol, if defined, indicates that the fsync routine is + * available to write a file's modified data and attributes to + * permanent storage. + */ +#$d_fsync HAS_FSYNC /**/ + /* HAS_FTELLO: * This symbol, if defined, indicates that the ftello routine is * available to ftell beyond 32 bits (useful for ILP32 hosts). @@ -1508,6 +1528,17 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_getpent HAS_GETPROTOENT /**/ +/* HAS_GETPGRP: + * This symbol, if defined, indicates that the getpgrp routine is + * available to get the current process group. + */ +/* USE_BSD_GETPGRP: + * This symbol, if defined, indicates that getpgrp needs one + * arguments whereas USG one needs none. + */ +#$d_getpgrp HAS_GETPGRP /**/ +#$d_bsdgetpgrp USE_BSD_GETPGRP /**/ + /* HAS_GETPROTOBYNAME: * This symbol, if defined, indicates that the getprotobyname() * routine is available to look up protocols by their name. @@ -1817,6 +1848,15 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_sanemcmp HAS_SANE_MEMCMP /**/ +/* HAS_SBRK_PROTO: + * This symbol, if defined, indicates that the system provides + * a prototype for the sbrk() function. Otherwise, it is up + * to the program to supply one. Good guesses are + * extern void* sbrk _((int)); + * extern void* sbrk _((size_t)); + */ +#$d_sbrkproto HAS_SBRK_PROTO /**/ + /* HAS_SEM: * This symbol, if defined, indicates that the entire sem*(2) library is * supported. @@ -1854,6 +1894,18 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_setpent HAS_SETPROTOENT /**/ +/* HAS_SETPGRP: + * This symbol, if defined, indicates that the setpgrp routine is + * available to set the current process group. + */ +/* USE_BSD_SETPGRP: + * This symbol, if defined, indicates that setpgrp needs two + * arguments whereas USG one needs none. See also HAS_SETPGID + * for a POSIX interface. + */ +#$d_setpgrp HAS_SETPGRP /**/ +#$d_bsdsetpgrp USE_BSD_SETPGRP /**/ + /* HAS_SETPROCTITLE: * This symbol, if defined, indicates that the setproctitle routine is * available to set process title. @@ -2125,6 +2177,12 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_strtoq HAS_STRTOQ /**/ +/* HAS_STRTOUL: + * This symbol, if defined, indicates that the strtoul routine is + * available to provide conversion of strings to unsigned long. + */ +#$d_strtoul HAS_STRTOUL /**/ + /* HAS_STRTOULL: * This symbol, if defined, indicates that the strtoull routine is * available to convert strings to unsigned long longs. @@ -2615,6 +2673,17 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un #define RD_NODATA $rd_nodata #$d_eofnblk EOF_NONBLOCK +/* NEED_VA_COPY: + * This symbol, if defined, indicates that the system stores + * the variable argument list datatype, va_list, in a format + * that cannot be copied by simple assignment, so that some + * other means must be used when copying is required. + * As such systems vary in their provision (or non-provision) + * of copying mechanisms, handy.h defines a platform- + * independent macro, Perl_va_copy(src, dst), to do the job. + */ +#$need_va_copy NEED_VA_COPY /**/ + /* Netdb_host_t: * This symbol holds the type used for the 1st argument * to gethostbyaddr(). @@ -2964,6 +3033,12 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #define STARTPERL "$startperl" /**/ +/* STDCHAR: + * This symbol is defined to be the type of char used in stdio.h. + * It has the values "unsigned char" or "char". + */ +#define STDCHAR $stdchar /**/ + /* HAS_STDIO_STREAM_ARRAY: * This symbol, if defined, tells that there is an array * holding the stdio streams. @@ -3182,80 +3257,5 @@ sed <$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un #define PERL_XS_APIVERSION "$xs_apiversion" #define PERL_PM_APIVERSION "$pm_apiversion" -/* HAS_GETPGRP: - * This symbol, if defined, indicates that the getpgrp routine is - * available to get the current process group. - */ -/* USE_BSD_GETPGRP: - * This symbol, if defined, indicates that getpgrp needs one - * arguments whereas USG one needs none. - */ -#$d_getpgrp HAS_GETPGRP /**/ -#$d_bsdgetpgrp USE_BSD_GETPGRP /**/ - -/* HAS_SETPGRP: - * This symbol, if defined, indicates that the setpgrp routine is - * available to set the current process group. - */ -/* USE_BSD_SETPGRP: - * This symbol, if defined, indicates that setpgrp needs two - * arguments whereas USG one needs none. See also HAS_SETPGID - * for a POSIX interface. - */ -#$d_setpgrp HAS_SETPGRP /**/ -#$d_bsdsetpgrp USE_BSD_SETPGRP /**/ - -/* HAS_STRTOUL: - * This symbol, if defined, indicates that the strtoul routine is - * available to provide conversion of strings to unsigned long. - */ -#$d_strtoul HAS_STRTOUL /**/ - -/* STDCHAR: - * This symbol is defined to be the type of char used in stdio.h. - * It has the values "unsigned char" or "char". - */ -#define STDCHAR $stdchar /**/ - -/* HAS__FWALK: - * This symbol, if defined, indicates that the _fwalk system call is - * available to apply a function to all the file handles. - */ -#$d__fwalk HAS__FWALK /**/ - -/* FCNTL_CAN_LOCK: - * This symbol, if defined, indicates that fcntl() can be used - * for file locking. Normally on Unix systems this is defined. - * It may be undefined on VMS. - */ -#$d_fcntl_can_lock FCNTL_CAN_LOCK /**/ - -/* HAS_FSYNC: - * This symbol, if defined, indicates that the fsync routine is - * available to write a file's modified data and attributes to - * permanent storage. - */ -#$d_fsync HAS_FSYNC /**/ - -/* HAS_SBRK_PROTO: - * This symbol, if defined, indicates that the system provides - * a prototype for the sbrk() function. Otherwise, it is up - * to the program to supply one. Good guesses are - * extern void* sbrk _((int)); - * extern void* sbrk _((size_t)); - */ -#$d_sbrkproto HAS_SBRK_PROTO /**/ - -/* NEED_VA_COPY: - * This symbol, if defined, indicates that the system stores - * the variable argument list datatype, va_list, in a format - * that cannot be copied by simple assignment, so that some - * other means must be used when copying is required. - * As such systems vary in their provision (or non-provision) - * of copying mechanisms, handy.h defines a platform- - * independent macro, Perl_va_copy(src, dst), to do the job. - */ -#$need_va_copy NEED_VA_COPY /**/ - #endif !GROK!THIS! diff --git a/configure.com b/configure.com index 36bf11e..e79fc98 100644 --- a/configure.com +++ b/configure.com @@ -53,6 +53,7 @@ $ use_two_pot_malloc = "N" $ use_pack_malloc = "N" $ use_debugmalloc = "N" $ ccflags = "" +$ static_ext = "" $ vms_default_directory_name = F$ENVIRONMENT("DEFAULT") $ max_allowed_dir_depth = 3 ! e.g. [A.B.PERLxxx] not [A.B.C.PERLxxx] $! max_allowed_dir_depth = 2 ! e.g. [A.PERLxxx] not [A.B.PERLxxx] @@ -2061,6 +2062,10 @@ $ ans = F$EDIT(ans,"TRIM,COMPRESS,LOWERCASE") $ IF ans.eqs."decc" then Has_Dec_C_Sockets = "T" $ IF ans.eqs."socketshr" then Has_socketshr = "T" $ ENDIF +$ IF Has_Dec_C_Sockets .or. Has_socketshr +$ THEN +$ static_ext = f$edit(static_ext+" "+"Socket","trim,compress") +$ ENDIF $! $! $! Ask if they want to build with VMS_DEBUG perl @@ -5310,7 +5315,7 @@ $ WC "spitshell='write sys$output '" $ WC "src='" + src + "'" $ WC "ssizetype='int'" $ WC "startperl=" + startperl ! This one's special--no enclosing single quotes -$ WC "static_ext='" + "'" +$ WC "static_ext='" + static_ext + "'" $ WC "stdchar='" + stdchar + "'" $ WC "stdio_base='((*fp)->_base)'" $ WC "stdio_bufsiz='((*fp)->_cnt + (*fp)->_ptr - (*fp)->_base)'" diff --git a/djgpp/config.over b/djgpp/config.over index f9c167e..1bdd8ca 100644 --- a/djgpp/config.over +++ b/djgpp/config.over @@ -35,7 +35,9 @@ repair() -e 's=File/=='\ -e 's=glob=='\ -e 's=Glob=='\ - -e 's/storable/Storable/' + -e 's/storable/Storable/'\ + -e 's/encode/Encode/'\ + -e 's=filter/util/call=Filter/Util/Call=' } static_ext=$(repair "$static_ext") extensions=$(repair "$extensions") diff --git a/embed.h b/embed.h index 64c1eaf..3b54154 100644 --- a/embed.h +++ b/embed.h @@ -543,6 +543,7 @@ #define ref Perl_ref #define refkids Perl_refkids #define regdump Perl_regdump +#define regclass_swash Perl_regclass_swash #define pregexec Perl_pregexec #define pregfree Perl_pregfree #define pregcomp Perl_pregcomp @@ -995,7 +996,6 @@ #define regbranch S_regbranch #define reguni S_reguni #define regclass S_regclass -#define regclassutf8 S_regclassutf8 #define regcurly S_regcurly #define reg_node S_reg_node #define regpiece S_regpiece @@ -1025,7 +1025,6 @@ #define regrepeat_hard S_regrepeat_hard #define regtry S_regtry #define reginclass S_reginclass -#define reginclassutf8 S_reginclassutf8 #define regcppush S_regcppush #define regcppop S_regcppop #define regcp_set_to S_regcp_set_to @@ -2015,6 +2014,7 @@ #define ref(a,b) Perl_ref(aTHX_ a,b) #define refkids(a,b) Perl_refkids(aTHX_ a,b) #define regdump(a) Perl_regdump(aTHX_ a) +#define regclass_swash(a,b,c) Perl_regclass_swash(aTHX_ a,b,c) #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) #define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c) @@ -2459,7 +2459,6 @@ #define regbranch(a,b,c) S_regbranch(aTHX_ a,b,c) #define reguni(a,b,c,d) S_reguni(aTHX_ a,b,c,d) #define regclass(a) S_regclass(aTHX_ a) -#define regclassutf8(a) S_regclassutf8(aTHX_ a) #define regcurly(a) S_regcurly(aTHX_ a) #define reg_node(a,b) S_reg_node(aTHX_ a,b) #define regpiece(a,b) S_regpiece(aTHX_ a,b) @@ -2487,8 +2486,7 @@ #define regrepeat(a,b) S_regrepeat(aTHX_ a,b) #define regrepeat_hard(a,b,c) S_regrepeat_hard(aTHX_ a,b,c) #define regtry(a,b) S_regtry(aTHX_ a,b) -#define reginclass(a,b) S_reginclass(aTHX_ a,b) -#define reginclassutf8(a,b) S_reginclassutf8(aTHX_ a,b) +#define reginclass(a,b,c) S_reginclass(aTHX_ a,b,c) #define regcppush(a) S_regcppush(aTHX_ a) #define regcppop() S_regcppop(aTHX) #define regcp_set_to(a) S_regcp_set_to(aTHX_ a) @@ -3950,6 +3948,8 @@ #define refkids Perl_refkids #define Perl_regdump CPerlObj::Perl_regdump #define regdump Perl_regdump +#define Perl_regclass_swash CPerlObj::Perl_regclass_swash +#define regclass_swash Perl_regclass_swash #define Perl_pregexec CPerlObj::Perl_pregexec #define pregexec Perl_pregexec #define Perl_pregfree CPerlObj::Perl_pregfree @@ -4787,8 +4787,6 @@ #define reguni S_reguni #define S_regclass CPerlObj::S_regclass #define regclass S_regclass -#define S_regclassutf8 CPerlObj::S_regclassutf8 -#define regclassutf8 S_regclassutf8 #define S_regcurly CPerlObj::S_regcurly #define regcurly S_regcurly #define S_reg_node CPerlObj::S_reg_node @@ -4845,8 +4843,6 @@ #define regtry S_regtry #define S_reginclass CPerlObj::S_reginclass #define reginclass S_reginclass -#define S_reginclassutf8 CPerlObj::S_reginclassutf8 -#define reginclassutf8 S_reginclassutf8 #define S_regcppush CPerlObj::S_regcppush #define regcppush S_regcppush #define S_regcppop CPerlObj::S_regcppop diff --git a/embed.pl b/embed.pl index 9e2bd9c..32f3ddc 100755 --- a/embed.pl +++ b/embed.pl @@ -1873,6 +1873,7 @@ Ap |void |push_scope p |OP* |ref |OP* o|I32 type p |OP* |refkids |OP* o|I32 type Ap |void |regdump |regexp* r +Ap |SV* |regclass_swash |struct regnode *n|bool doinit|SV **initsvp Ap |I32 |pregexec |regexp* prog|char* stringarg \ |char* strend|char* strbeg|I32 minend \ |SV* screamer|U32 nosave @@ -2366,7 +2367,6 @@ s |regnode*|regatom |struct RExC_state_t*|I32 * s |regnode*|regbranch |struct RExC_state_t*|I32 *|I32 s |void |reguni |struct RExC_state_t*|UV|char *|STRLEN* s |regnode*|regclass |struct RExC_state_t* -s |regnode*|regclassutf8 |struct RExC_state_t* s |I32 |regcurly |char * s |regnode*|reg_node |struct RExC_state_t*|U8 s |regnode*|regpiece |struct RExC_state_t*|I32 * @@ -2401,8 +2401,7 @@ s |I32 |regmatch |regnode *prog s |I32 |regrepeat |regnode *p|I32 max s |I32 |regrepeat_hard |regnode *p|I32 max|I32 *lp s |I32 |regtry |regexp *prog|char *startpos -s |bool |reginclass |regnode *p|I32 c -s |bool |reginclassutf8 |regnode *f|U8* p +s |bool |reginclass |regnode *n|U8 *p|bool do_utf8sv_is_utf8 s |CHECKPOINT|regcppush |I32 parenfloor s |char*|regcppop s |char*|regcp_set_to |I32 ss diff --git a/ext/DB_File/Changes b/ext/DB_File/Changes index 31c22f7..eda270d 100644 --- a/ext/DB_File/Changes +++ b/ext/DB_File/Changes @@ -323,3 +323,14 @@ * Included Perl core patch 8068 -- fix for bug 20001013.009 When run with warnings enabled "$hash{XX} = undef " produced an "Uninitialized value" warning. This has been fixed. + +1.75 17th December 2000 + + * Fixed perl core patch 7703 + + * Added suppport to allow DB_File to be built with Berkeley DB 3.2 -- + btree_compare, btree_prefix and hash_cb needed to be changed. + + * Updated dbinfo to support Berkeley DB 3.2 file format changes. + + diff --git a/ext/DB_File/DB_File.pm b/ext/DB_File/DB_File.pm index 2f3aafe..c830216 100644 --- a/ext/DB_File/DB_File.pm +++ b/ext/DB_File/DB_File.pm @@ -1,8 +1,8 @@ # DB_File.pm -- Perl 5 interface to Berkeley DB # # written by Paul Marquess (Paul.Marquess@btinternet.com) -# last modified 10th December 2000 -# version 1.74 +# last modified 17th December 2000 +# version 1.75 # # Copyright (c) 1995-2000 Paul Marquess. All rights reserved. # This program is free software; you can redistribute it and/or @@ -151,7 +151,7 @@ use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO use Carp; -$VERSION = "1.74" ; +$VERSION = "1.75" ; #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; $DB_BTREE = new DB_File::BTREEINFO ; diff --git a/ext/DB_File/DB_File.xs b/ext/DB_File/DB_File.xs index 5ba18f3..fa3bb33 100644 --- a/ext/DB_File/DB_File.xs +++ b/ext/DB_File/DB_File.xs @@ -3,8 +3,8 @@ DB_File.xs -- Perl 5 interface to Berkeley DB written by Paul Marquess - last modified 10 December 2000 - version 1.74 + last modified 17 December 2000 + version 1.75 All comments/suggestions/problems are welcome @@ -86,6 +86,10 @@ 1.74 - A call to open needed parenthesised to stop it clashing with a win32 macro. Added Perl core patches 7703 & 7801. + 1.75 - Fixed Perl core patch 7703. + Added suppport to allow DB_File to be built with + Berkeley DB 3.2 -- btree_compare, btree_prefix and hash_cb + needed to be changed. */ @@ -166,6 +170,10 @@ extern void __getBerkeleyDBInfo(void); # define BERKELEY_DB_1_OR_2 #endif +#if DB_VERSION_MAJOR > 3 || (DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR >= 2) +# define AT_LEAST_DB_3_2 +#endif + /* map version 2 features & constants onto their version 1 equivalent */ #ifdef DB_Prefix_t @@ -251,6 +259,7 @@ typedef db_recno_t recno_t; #else /* db version 1.x */ +#define BERKELEY_DB_1 #define BERKELEY_DB_1_OR_2 typedef union INFO { @@ -480,6 +489,19 @@ u_int flags ; static int +#ifdef AT_LEAST_DB_3_2 + +#ifdef CAN_PROTOTYPE +btree_compare(DB * db, const DBT *key1, const DBT *key2) +#else +btree_compare(db, key1, key2) +DB * db ; +const DBT * key1 ; +const DBT * key2 ; +#endif /* CAN_PROTOTYPE */ + +#else /* Berkeley DB < 3.2 */ + #ifdef CAN_PROTOTYPE btree_compare(const DBT *key1, const DBT *key2) #else @@ -487,6 +509,9 @@ btree_compare(key1, key2) const DBT * key1 ; const DBT * key2 ; #endif + +#endif + { #ifdef dTHX dTHX; @@ -536,6 +561,19 @@ const DBT * key2 ; } static DB_Prefix_t +#ifdef AT_LEAST_DB_3_2 + +#ifdef CAN_PROTOTYPE +btree_prefix(DB * db, const DBT *key1, const DBT *key2) +#else +btree_prefix(db, key1, key2) +Db * db ; +const DBT * key1 ; +const DBT * key2 ; +#endif + +#else /* Berkeley DB < 3.2 */ + #ifdef CAN_PROTOTYPE btree_prefix(const DBT *key1, const DBT *key2) #else @@ -543,6 +581,8 @@ btree_prefix(key1, key2) const DBT * key1 ; const DBT * key2 ; #endif + +#endif { #ifdef dTHX dTHX; @@ -592,13 +632,26 @@ const DBT * key2 ; } -#if defined(BERKELEY_DB_1_OR_2) && !(DB_VERSION_MINOR == 7 && DB_VERSION_PATCH >= 7) +#ifdef BERKELEY_DB_1 # define HASH_CB_SIZE_TYPE size_t #else # define HASH_CB_SIZE_TYPE u_int32_t #endif static DB_Hash_t +#ifdef AT_LEAST_DB_3_2 + +#ifdef CAN_PROTOTYPE +hash_cb(DB * db, const void *data, u_int32_t size) +#else +hash_cb(db, data, size) +DB * db ; +const void * data ; +HASH_CB_SIZE_TYPE size ; +#endif + +#else /* Berkeley DB < 3.2 */ + #ifdef CAN_PROTOTYPE hash_cb(const void *data, HASH_CB_SIZE_TYPE size) #else @@ -606,6 +659,8 @@ hash_cb(data, size) const void * data ; HASH_CB_SIZE_TYPE size ; #endif + +#endif { #ifdef dTHX dTHX; diff --git a/ext/DB_File/dbinfo b/ext/DB_File/dbinfo index 240e3fc..5a4df15 100644 --- a/ext/DB_File/dbinfo +++ b/ext/DB_File/dbinfo @@ -49,7 +49,9 @@ my %Data = Type => "Queue", Versions => { - 1 => "3.0.0 or greater", + 1 => "3.0.x", + 2 => "3.1.x", + 3 => "3.2.x or greater", } }, ) ; @@ -88,7 +90,7 @@ else { die "not a Berkeley DB database file.\n" } my $type = $Data{$magic} ; -my $magic = sprintf "%06X", $magic ; +$magic = sprintf "%06X", $magic ; my $ver_string = "Unknown" ; $ver_string = $type->{Versions}{$version} diff --git a/ext/Fcntl/Fcntl.pm b/ext/Fcntl/Fcntl.pm index 92103a1..c68dda1 100644 --- a/ext/Fcntl/Fcntl.pm +++ b/ext/Fcntl/Fcntl.pm @@ -201,7 +201,7 @@ sub S_ISENFMT { ( $_[0] & _S_IFMT() ) == S_IFENFMT() } sub AUTOLOAD { (my $constname = $AUTOLOAD) =~ s/.*:://; - my $val = constant($constname, 0); + my $val = constant($constname); if ($! != 0) { if ($! =~ /Invalid/ || $!{EINVAL}) { $AutoLoader::AUTOLOAD = $AUTOLOAD; diff --git a/ext/Fcntl/Fcntl.xs b/ext/Fcntl/Fcntl.xs index b597e03..21029b2 100644 --- a/ext/Fcntl/Fcntl.xs +++ b/ext/Fcntl/Fcntl.xs @@ -40,13 +40,13 @@ not_here(char *s) return -1; } -static double -constant(char *name, int arg) +static IV +constant(char *name) { errno = 0; - switch (*name) { + switch (*(name++)) { case '_': - if (strEQ(name, "_S_IFMT")) /* Yes, on name _S_IFMT return S_IFMT. */ + if (strEQ(name, "S_IFMT")) /* Yes, on name _S_IFMT return S_IFMT. */ #ifdef S_IFMT return S_IFMT; #else @@ -54,218 +54,219 @@ constant(char *name, int arg) #endif break; case 'F': - if (strnEQ(name, "F_", 2)) { - if (strEQ(name, "F_ALLOCSP")) + if (*name == '_') { + name++; + if (strEQ(name, "ALLOCSP")) #ifdef F_ALLOCSP return F_ALLOCSP; #else goto not_there; #endif - if (strEQ(name, "F_ALLOCSP64")) + if (strEQ(name, "ALLOCSP64")) #ifdef F_ALLOCSP64 return F_ALLOCSP64; #else goto not_there; #endif - if (strEQ(name, "F_COMPAT")) + if (strEQ(name, "COMPAT")) #ifdef F_COMPAT return F_COMPAT; #else goto not_there; #endif - if (strEQ(name, "F_DUP2FD")) + if (strEQ(name, "DUP2FD")) #ifdef F_DUP2FD return F_DUP2FD; #else goto not_there; #endif - if (strEQ(name, "F_DUPFD")) + if (strEQ(name, "DUPFD")) #ifdef F_DUPFD return F_DUPFD; #else goto not_there; #endif - if (strEQ(name, "F_EXLCK")) + if (strEQ(name, "EXLCK")) #ifdef F_EXLCK return F_EXLCK; #else goto not_there; #endif - if (strEQ(name, "F_FREESP")) + if (strEQ(name, "FREESP")) #ifdef F_FREESP return F_FREESP; #else goto not_there; #endif - if (strEQ(name, "F_FREESP64")) + if (strEQ(name, "FREESP64")) #ifdef F_FREESP64 return F_FREESP64; #else goto not_there; #endif - if (strEQ(name, "F_FSYNC")) + if (strEQ(name, "FSYNC")) #ifdef F_FSYNC return F_FSYNC; #else goto not_there; #endif - if (strEQ(name, "F_FSYNC64")) + if (strEQ(name, "FSYNC64")) #ifdef F_FSYNC64 return F_FSYNC64; #else goto not_there; #endif - if (strEQ(name, "F_GETFD")) + if (strEQ(name, "GETFD")) #ifdef F_GETFD return F_GETFD; #else goto not_there; #endif - if (strEQ(name, "F_GETFL")) + if (strEQ(name, "GETFL")) #ifdef F_GETFL return F_GETFL; #else goto not_there; #endif - if (strEQ(name, "F_GETLK")) + if (strEQ(name, "GETLK")) #ifdef F_GETLK return F_GETLK; #else goto not_there; #endif - if (strEQ(name, "F_GETLK64")) + if (strEQ(name, "GETLK64")) #ifdef F_GETLK64 return F_GETLK64; #else goto not_there; #endif - if (strEQ(name, "F_GETOWN")) + if (strEQ(name, "GETOWN")) #ifdef F_GETOWN return F_GETOWN; #else goto not_there; #endif - if (strEQ(name, "F_NODNY")) + if (strEQ(name, "NODNY")) #ifdef F_NODNY return F_NODNY; #else goto not_there; #endif - if (strEQ(name, "F_POSIX")) + if (strEQ(name, "POSIX")) #ifdef F_POSIX return F_POSIX; #else goto not_there; #endif - if (strEQ(name, "F_RDACC")) + if (strEQ(name, "RDACC")) #ifdef F_RDACC return F_RDACC; #else goto not_there; #endif - if (strEQ(name, "F_RDDNY")) + if (strEQ(name, "RDDNY")) #ifdef F_RDDNY return F_RDDNY; #else goto not_there; #endif - if (strEQ(name, "F_RDLCK")) + if (strEQ(name, "RDLCK")) #ifdef F_RDLCK return F_RDLCK; #else goto not_there; #endif - if (strEQ(name, "F_RWACC")) + if (strEQ(name, "RWACC")) #ifdef F_RWACC return F_RWACC; #else goto not_there; #endif - if (strEQ(name, "F_RWDNY")) + if (strEQ(name, "RWDNY")) #ifdef F_RWDNY return F_RWDNY; #else goto not_there; #endif - if (strEQ(name, "F_SETFD")) + if (strEQ(name, "SETFD")) #ifdef F_SETFD return F_SETFD; #else goto not_there; #endif - if (strEQ(name, "F_SETFL")) + if (strEQ(name, "SETFL")) #ifdef F_SETFL return F_SETFL; #else goto not_there; #endif - if (strEQ(name, "F_SETLK")) + if (strEQ(name, "SETLK")) #ifdef F_SETLK return F_SETLK; #else goto not_there; #endif - if (strEQ(name, "F_SETLK64")) + if (strEQ(name, "SETLK64")) #ifdef F_SETLK64 return F_SETLK64; #else goto not_there; #endif - if (strEQ(name, "F_SETLKW")) + if (strEQ(name, "SETLKW")) #ifdef F_SETLKW return F_SETLKW; #else goto not_there; #endif - if (strEQ(name, "F_SETLKW64")) + if (strEQ(name, "SETLKW64")) #ifdef F_SETLKW64 return F_SETLKW64; #else goto not_there; #endif - if (strEQ(name, "F_SETOWN")) + if (strEQ(name, "SETOWN")) #ifdef F_SETOWN return F_SETOWN; #else goto not_there; #endif - if (strEQ(name, "F_SHARE")) + if (strEQ(name, "SHARE")) #ifdef F_SHARE return F_SHARE; #else goto not_there; #endif - if (strEQ(name, "F_SHLCK")) + if (strEQ(name, "SHLCK")) #ifdef F_SHLCK return F_SHLCK; #else goto not_there; #endif - if (strEQ(name, "F_UNLCK")) + if (strEQ(name, "UNLCK")) #ifdef F_UNLCK return F_UNLCK; #else goto not_there; #endif - if (strEQ(name, "F_UNSHARE")) + if (strEQ(name, "UNSHARE")) #ifdef F_UNSHARE return F_UNSHARE; #else goto not_there; #endif - if (strEQ(name, "F_WRACC")) + if (strEQ(name, "WRACC")) #ifdef F_WRACC return F_WRACC; #else goto not_there; #endif - if (strEQ(name, "F_WRDNY")) + if (strEQ(name, "WRDNY")) #ifdef F_WRDNY return F_WRDNY; #else goto not_there; #endif - if (strEQ(name, "F_WRLCK")) + if (strEQ(name, "WRLCK")) #ifdef F_WRLCK return F_WRLCK; #else @@ -274,79 +275,79 @@ constant(char *name, int arg) errno = EINVAL; return 0; } - if (strEQ(name, "FAPPEND")) + if (strEQ(name, "APPEND")) #ifdef FAPPEND return FAPPEND; #else goto not_there; #endif - if (strEQ(name, "FASYNC")) + if (strEQ(name, "ASYNC")) #ifdef FASYNC return FASYNC; #else goto not_there; #endif - if (strEQ(name, "FCREAT")) + if (strEQ(name, "CREAT")) #ifdef FCREAT return FCREAT; #else goto not_there; #endif - if (strEQ(name, "FD_CLOEXEC")) + if (strEQ(name, "D_CLOEXEC")) #ifdef FD_CLOEXEC return FD_CLOEXEC; #else goto not_there; #endif - if (strEQ(name, "FDEFER")) + if (strEQ(name, "DEFER")) #ifdef FDEFER return FDEFER; #else goto not_there; #endif - if (strEQ(name, "FDSYNC")) + if (strEQ(name, "DSYNC")) #ifdef FDSYNC return FDSYNC; #else goto not_there; #endif - if (strEQ(name, "FEXCL")) + if (strEQ(name, "EXCL")) #ifdef FEXCL return FEXCL; #else goto not_there; #endif - if (strEQ(name, "FLARGEFILE")) + if (strEQ(name, "LARGEFILE")) #ifdef FLARGEFILE return FLARGEFILE; #else goto not_there; #endif - if (strEQ(name, "FNDELAY")) + if (strEQ(name, "NDELAY")) #ifdef FNDELAY return FNDELAY; #else goto not_there; #endif - if (strEQ(name, "FNONBLOCK")) + if (strEQ(name, "NONBLOCK")) #ifdef FNONBLOCK return FNONBLOCK; #else goto not_there; #endif - if (strEQ(name, "FRSYNC")) + if (strEQ(name, "RSYNC")) #ifdef FRSYNC return FRSYNC; #else goto not_there; #endif - if (strEQ(name, "FSYNC")) + if (strEQ(name, "SYNC")) #ifdef FSYNC return FSYNC; #else goto not_there; #endif - if (strEQ(name, "FTRUNC")) + if (strEQ(name, "TRUNC")) #ifdef FTRUNC return FTRUNC; #else @@ -354,28 +355,29 @@ constant(char *name, int arg) #endif break; case 'L': - if (strnEQ(name, "LOCK_", 5)) { + if (strnEQ(name, "OCK_", 4)) { /* We support flock() on systems which don't have it, so always supply the constants. */ - if (strEQ(name, "LOCK_SH")) + name += 4; + if (strEQ(name, "SH")) #ifdef LOCK_SH return LOCK_SH; #else return 1; #endif - if (strEQ(name, "LOCK_EX")) + if (strEQ(name, "EX")) #ifdef LOCK_EX return LOCK_EX; #else return 2; #endif - if (strEQ(name, "LOCK_NB")) + if (strEQ(name, "NB")) #ifdef LOCK_NB return LOCK_NB; #else return 4; #endif - if (strEQ(name, "LOCK_UN")) + if (strEQ(name, "UN")) #ifdef LOCK_UN return LOCK_UN; #else @@ -385,188 +387,189 @@ constant(char *name, int arg) goto not_there; break; case 'O': - if (strnEQ(name, "O_", 2)) { - if (strEQ(name, "O_ACCMODE")) + if (name[0] == '_') { + name++; + if (strEQ(name, "ACCMODE")) #ifdef O_ACCMODE return O_ACCMODE; #else goto not_there; #endif - if (strEQ(name, "O_APPEND")) + if (strEQ(name, "APPEND")) #ifdef O_APPEND return O_APPEND; #else goto not_there; #endif - if (strEQ(name, "O_ASYNC")) + if (strEQ(name, "ASYNC")) #ifdef O_ASYNC return O_ASYNC; #else goto not_there; #endif - if (strEQ(name, "O_BINARY")) + if (strEQ(name, "BINARY")) #ifdef O_BINARY return O_BINARY; #else goto not_there; #endif - if (strEQ(name, "O_CREAT")) + if (strEQ(name, "CREAT")) #ifdef O_CREAT return O_CREAT; #else goto not_there; #endif - if (strEQ(name, "O_DEFER")) + if (strEQ(name, "DEFER")) #ifdef O_DEFER return O_DEFER; #else goto not_there; #endif - if (strEQ(name, "O_DIRECT")) + if (strEQ(name, "DIRECT")) #ifdef O_DIRECT return O_DIRECT; #else goto not_there; #endif - if (strEQ(name, "O_DIRECTORY")) + if (strEQ(name, "DIRECTORY")) #ifdef O_DIRECTORY return O_DIRECTORY; #else goto not_there; #endif - if (strEQ(name, "O_DSYNC")) + if (strEQ(name, "DSYNC")) #ifdef O_DSYNC return O_DSYNC; #else goto not_there; #endif - if (strEQ(name, "O_EXCL")) + if (strEQ(name, "EXCL")) #ifdef O_EXCL return O_EXCL; #else goto not_there; #endif - if (strEQ(name, "O_EXLOCK")) + if (strEQ(name, "EXLOCK")) #ifdef O_EXLOCK return O_EXLOCK; #else goto not_there; #endif - if (strEQ(name, "O_LARGEFILE")) + if (strEQ(name, "LARGEFILE")) #ifdef O_LARGEFILE return O_LARGEFILE; #else goto not_there; #endif - if (strEQ(name, "O_NDELAY")) + if (strEQ(name, "NDELAY")) #ifdef O_NDELAY return O_NDELAY; #else goto not_there; #endif - if (strEQ(name, "O_NOCTTY")) + if (strEQ(name, "NOCTTY")) #ifdef O_NOCTTY return O_NOCTTY; #else goto not_there; #endif - if (strEQ(name, "O_NOFOLLOW")) + if (strEQ(name, "NOFOLLOW")) #ifdef O_NOFOLLOW return O_NOFOLLOW; #else goto not_there; #endif - if (strEQ(name, "O_NOINHERIT")) + if (strEQ(name, "NOINHERIT")) #ifdef O_NOINHERIT return O_NOINHERIT; #else goto not_there; #endif - if (strEQ(name, "O_NONBLOCK")) + if (strEQ(name, "NONBLOCK")) #ifdef O_NONBLOCK return O_NONBLOCK; #else goto not_there; #endif - if (strEQ(name, "O_RANDOM")) + if (strEQ(name, "RANDOM")) #ifdef O_RANDOM return O_RANDOM; #else goto not_there; #endif - if (strEQ(name, "O_RAW")) + if (strEQ(name, "RAW")) #ifdef O_RAW return O_RAW; #else goto not_there; #endif - if (strEQ(name, "O_RDONLY")) + if (strEQ(name, "RDONLY")) #ifdef O_RDONLY return O_RDONLY; #else goto not_there; #endif - if (strEQ(name, "O_RDWR")) + if (strEQ(name, "RDWR")) #ifdef O_RDWR return O_RDWR; #else goto not_there; #endif - if (strEQ(name, "O_RSYNC")) + if (strEQ(name, "RSYNC")) #ifdef O_RSYNC return O_RSYNC; #else goto not_there; #endif - if (strEQ(name, "O_SEQUENTIAL")) + if (strEQ(name, "SEQUENTIAL")) #ifdef O_SEQUENTIAL return O_SEQUENTIAL; #else goto not_there; #endif - if (strEQ(name, "O_SHLOCK")) + if (strEQ(name, "SHLOCK")) #ifdef O_SHLOCK return O_SHLOCK; #else goto not_there; #endif - if (strEQ(name, "O_SYNC")) + if (strEQ(name, "SYNC")) #ifdef O_SYNC return O_SYNC; #else goto not_there; #endif - if (strEQ(name, "O_TEMPORARY")) + if (strEQ(name, "TEMPORARY")) #ifdef O_TEMPORARY return O_TEMPORARY; #else goto not_there; #endif - if (strEQ(name, "O_TEXT")) + if (strEQ(name, "TEXT")) #ifdef O_TEXT return O_TEXT; #else goto not_there; #endif - if (strEQ(name, "O_TRUNC")) + if (strEQ(name, "TRUNC")) #ifdef O_TRUNC return O_TRUNC; #else goto not_there; #endif - if (strEQ(name, "O_WRONLY")) + if (strEQ(name, "WRONLY")) #ifdef O_WRONLY return O_WRONLY; #else goto not_there; #endif - if (strEQ(name, "O_ALIAS")) + if (strEQ(name, "ALIAS")) #ifdef O_ALIAS return O_ALIAS; #else goto not_there; #endif - if (strEQ(name, "O_RSRC")) + if (strEQ(name, "RSRC")) #ifdef O_RSRC return O_RSRC; #else @@ -576,171 +579,171 @@ constant(char *name, int arg) goto not_there; break; case 'S': - switch (name[1]) { + switch (*(name++)) { case '_': - if (strEQ(name, "S_ISUID")) + if (strEQ(name, "ISUID")) #ifdef S_ISUID return S_ISUID; #else goto not_there; #endif - if (strEQ(name, "S_ISGID")) + if (strEQ(name, "ISGID")) #ifdef S_ISGID return S_ISGID; #else goto not_there; #endif - if (strEQ(name, "S_ISVTX")) + if (strEQ(name, "ISVTX")) #ifdef S_ISVTX return S_ISVTX; #else goto not_there; #endif - if (strEQ(name, "S_ISTXT")) + if (strEQ(name, "ISTXT")) #ifdef S_ISTXT return S_ISTXT; #else goto not_there; #endif - if (strEQ(name, "S_IFREG")) + if (strEQ(name, "IFREG")) #ifdef S_IFREG return S_IFREG; #else goto not_there; #endif - if (strEQ(name, "S_IFDIR")) + if (strEQ(name, "IFDIR")) #ifdef S_IFDIR return S_IFDIR; #else goto not_there; #endif - if (strEQ(name, "S_IFLNK")) + if (strEQ(name, "IFLNK")) #ifdef S_IFLNK return S_IFLNK; #else goto not_there; #endif - if (strEQ(name, "S_IFSOCK")) + if (strEQ(name, "IFSOCK")) #ifdef S_IFSOCK return S_IFSOCK; #else goto not_there; #endif - if (strEQ(name, "S_IFBLK")) + if (strEQ(name, "IFBLK")) #ifdef S_IFBLK return S_IFBLK; #else goto not_there; #endif - if (strEQ(name, "S_IFCHR")) + if (strEQ(name, "IFCHR")) #ifdef S_IFCHR return S_IFCHR; #else goto not_there; #endif - if (strEQ(name, "S_IFIFO")) + if (strEQ(name, "IFIFO")) #ifdef S_IFIFO return S_IFIFO; #else goto not_there; #endif - if (strEQ(name, "S_IFWHT")) + if (strEQ(name, "IFWHT")) #ifdef S_IFWHT return S_IFWHT; #else goto not_there; #endif - if (strEQ(name, "S_ENFMT")) + if (strEQ(name, "ENFMT")) #ifdef S_ENFMT return S_ENFMT; #else goto not_there; #endif - if (strEQ(name, "S_IRUSR")) + if (strEQ(name, "IRUSR")) #ifdef S_IRUSR return S_IRUSR; #else goto not_there; #endif - if (strEQ(name, "S_IWUSR")) + if (strEQ(name, "IWUSR")) #ifdef S_IWUSR return S_IWUSR; #else goto not_there; #endif - if (strEQ(name, "S_IXUSR")) + if (strEQ(name, "IXUSR")) #ifdef S_IXUSR return S_IXUSR; #else goto not_there; #endif - if (strEQ(name, "S_IRWXU")) + if (strEQ(name, "IRWXU")) #ifdef S_IRWXU return S_IRWXU; #else goto not_there; #endif - if (strEQ(name, "S_IRGRP")) + if (strEQ(name, "IRGRP")) #ifdef S_IRGRP return S_IRGRP; #else goto not_there; #endif - if (strEQ(name, "S_IWGRP")) + if (strEQ(name, "IWGRP")) #ifdef S_IWGRP return S_IWGRP; #else goto not_there; #endif - if (strEQ(name, "S_IXGRP")) + if (strEQ(name, "IXGRP")) #ifdef S_IXGRP return S_IXGRP; #else goto not_there; #endif - if (strEQ(name, "S_IRWXG")) + if (strEQ(name, "IRWXG")) #ifdef S_IRWXG return S_IRWXG; #else goto not_there; #endif - if (strEQ(name, "S_IROTH")) + if (strEQ(name, "IROTH")) #ifdef S_IROTH return S_IROTH; #else goto not_there; #endif - if (strEQ(name, "S_IWOTH")) + if (strEQ(name, "IWOTH")) #ifdef S_IWOTH return S_IWOTH; #else goto not_there; #endif - if (strEQ(name, "S_IXOTH")) + if (strEQ(name, "IXOTH")) #ifdef S_IXOTH return S_IXOTH; #else goto not_there; #endif - if (strEQ(name, "S_IRWXO")) + if (strEQ(name, "IRWXO")) #ifdef S_IRWXO return S_IRWXO; #else goto not_there; #endif - if (strEQ(name, "S_IREAD")) + if (strEQ(name, "IREAD")) #ifdef S_IREAD return S_IREAD; #else goto not_there; #endif - if (strEQ(name, "S_IWRITE")) + if (strEQ(name, "IWRITE")) #ifdef S_IWRITE return S_IWRITE; #else goto not_there; #endif - if (strEQ(name, "S_IEXEC")) + if (strEQ(name, "IEXEC")) #ifdef S_IEXEC return S_IEXEC; #else @@ -748,19 +751,19 @@ constant(char *name, int arg) #endif break; case 'E': - if (strEQ(name, "SEEK_CUR")) + if (strEQ(name, "EK_CUR")) #ifdef SEEK_CUR return SEEK_CUR; #else return 1; #endif - if (strEQ(name, "SEEK_END")) + if (strEQ(name, "EK_END")) #ifdef SEEK_END return SEEK_END; #else return 2; #endif - if (strEQ(name, "SEEK_SET")) + if (strEQ(name, "EK_SET")) #ifdef SEEK_SET return SEEK_SET; #else @@ -780,8 +783,7 @@ not_there: MODULE = Fcntl PACKAGE = Fcntl -double -constant(name,arg) +IV +constant(name) char * name - int arg diff --git a/ext/Sys/Syslog/Syslog.pm b/ext/Sys/Syslog/Syslog.pm index 92b82a1..71f5b82 100644 --- a/ext/Sys/Syslog/Syslog.pm +++ b/ext/Sys/Syslog/Syslog.pm @@ -264,7 +264,9 @@ sub xlate { $name = uc $name; $name = "LOG_$name" unless $name =~ /^LOG_/; $name = "Sys::Syslog::$name"; - eval { &$name } || -1; + # Can't have just eval { &$name } || -1 because some LOG_XXX may be zero. + my $value = eval { &$name }; + defined $value ? $value : -1; } sub connect { diff --git a/global.sym b/global.sym index 7ca196b..2f6f65b 100644 --- a/global.sym +++ b/global.sym @@ -21,6 +21,7 @@ Perl_get_context Perl_set_context Perl_amagic_call Perl_Gv_AMupdate +Perl_gv_handler Perl_apply_attrs_string Perl_avhv_delete_ent Perl_avhv_exists_ent @@ -315,6 +316,7 @@ Perl_pmflag Perl_pop_scope Perl_push_scope Perl_regdump +Perl_regclass_swash Perl_pregexec Perl_pregfree Perl_pregcomp diff --git a/lib/Net/Ping.pm b/lib/Net/Ping.pm index 20a642e..a2846fe 100644 --- a/lib/Net/Ping.pm +++ b/lib/Net/Ping.pm @@ -461,7 +461,7 @@ same data as the packet that was sent, the remote host is considered reachable. This protocol does not require any special privileges. It should be borne in mind that, for both tcp and udp ping, a host -will be reported as unreachable if if not is not running the +will be reported as unreachable if it is not running the appropriate echo service. For Unix-like systems see L for more information. diff --git a/mg.c b/mg.c index f97c6ce..0ac0742 100644 --- a/mg.c +++ b/mg.c @@ -391,7 +391,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) case '5': case '6': case '7': case '8': case '9': case '&': if (PL_curpm && (rx = PL_curpm->op_pmregexp)) { - paren = atoi(mg->mg_ptr); + paren = atoi(mg->mg_ptr); /* $& is in [0] */ getparen: if (paren <= rx->nparens && (s1 = rx->startp[paren]) != -1 && @@ -399,17 +399,15 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) { i = t1 - s1; getlen: - if (i > 0 && (PL_curpm->op_pmdynflags & PMdf_UTF8) && !IN_BYTE) { - char *s = rx->subbeg + s1; + if (i > 0 && DO_UTF8(PL_reg_sv)) { + char *s = rx->subbeg + s1; char *send = rx->subbeg + t1; - i = 0; - while (s < send) { - s += UTF8SKIP(s); - i++; - } + + i = Perl_utf8_length(aTHX_ (U8*)s, (U8*)send); } - if (i >= 0) - return i; + if (i < 0) + Perl_croak(aTHX_ "panic: magic_len: %d", i); + return i; } } return 0; @@ -604,7 +602,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) * Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj)); * XXX Does the new way break anything? */ - paren = atoi(mg->mg_ptr); + paren = atoi(mg->mg_ptr); /* $& is in [0] */ getparen: if (paren <= rx->nparens && (s1 = rx->startp[paren]) != -1 && @@ -623,7 +621,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) PL_tainted = FALSE; } sv_setpvn(sv, s, i); - if ((PL_curpm->op_pmdynflags & PMdf_UTF8) && !IN_BYTE) + if (DO_UTF8(PL_reg_sv)) SvUTF8_on(sv); else SvUTF8_off(sv); diff --git a/objXSUB.h b/objXSUB.h index 43537d3..60c6e90 100644 --- a/objXSUB.h +++ b/objXSUB.h @@ -1263,6 +1263,10 @@ #define Perl_regdump pPerl->Perl_regdump #undef regdump #define regdump Perl_regdump +#undef Perl_regclass_swash +#define Perl_regclass_swash pPerl->Perl_regclass_swash +#undef regclass_swash +#define regclass_swash Perl_regclass_swash #undef Perl_pregexec #define Perl_pregexec pPerl->Perl_pregexec #undef pregexec diff --git a/op.c b/op.c index e6f7804..e40d334 100644 --- a/op.c +++ b/op.c @@ -1118,6 +1118,12 @@ Perl_scalarvoid(pTHX_ OP *o) if (SvNIOK(sv) && (SvNV(sv) == 0.0 || SvNV(sv) == 1.0)) useless = 0; else if (SvPOK(sv)) { + /* perl4's way of mixing documentation and code + (before the invention of POD) was based on a + trick to mix nroff and perl code. The trick was + built upon these three nroff macros being used in + void context. The pink camel has the details in + the script wrapman near page 319. */ if (strnEQ(SvPVX(sv), "di", 2) || strnEQ(SvPVX(sv), "ds", 2) || strnEQ(SvPVX(sv), "ig", 2)) diff --git a/os2/OS2/ExtAttr/Makefile.PL b/os2/OS2/ExtAttr/Makefile.PL index 3568028..0b8837f 100644 --- a/os2/OS2/ExtAttr/Makefile.PL +++ b/os2/OS2/ExtAttr/Makefile.PL @@ -4,7 +4,7 @@ use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'OS2::ExtAttr', 'VERSION_FROM' => 'ExtAttr.pm', # finds $VERSION - MAN3PODS => ' ', # Pods will be built by installman. + MAN3PODS => {}, # Pods will be built by installman. 'LIBS' => [''], # e.g., '-lm' 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' 'INC' => '', # e.g., '-I/usr/include/other' diff --git a/os2/OS2/PrfDB/Makefile.PL b/os2/OS2/PrfDB/Makefile.PL index 3952168..2d4a6a7 100644 --- a/os2/OS2/PrfDB/Makefile.PL +++ b/os2/OS2/PrfDB/Makefile.PL @@ -4,7 +4,7 @@ use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'OS2::PrfDB', 'VERSION_FROM' => 'PrfDB.pm', # finds $VERSION - MAN3PODS => ' ', # Pods will be built by installman. + MAN3PODS => {}, # Pods will be built by installman. 'LIBS' => [''], # e.g., '-lm' 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' 'INC' => '', # e.g., '-I/usr/include/other' diff --git a/os2/OS2/Process/Makefile.PL b/os2/OS2/Process/Makefile.PL index d324063..9c97ad0 100644 --- a/os2/OS2/Process/Makefile.PL +++ b/os2/OS2/Process/Makefile.PL @@ -4,7 +4,7 @@ use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'OS2::Process', VERSION_FROM=> 'Process.pm', - MAN3PODS => ' ', # Pods will be built by installman. + MAN3PODS => {}, # Pods will be built by installman. 'LIBS' => [''], # e.g., '-lm' 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' 'INC' => '', # e.g., '-I/usr/include/other' diff --git a/os2/OS2/REXX/DLL/Makefile.PL b/os2/OS2/REXX/DLL/Makefile.PL index fe2403d..fb91688 100644 --- a/os2/OS2/REXX/DLL/Makefile.PL +++ b/os2/OS2/REXX/DLL/Makefile.PL @@ -3,7 +3,7 @@ use ExtUtils::MakeMaker; WriteMakefile( NAME => 'OS2::DLL', VERSION => '0.01', - MAN3PODS => ' ', # Pods will be built by installman. + MAN3PODS => {}, # Pods will be built by installman. XSPROTOARG => '-noprototypes', PERL_MALLOC_OK => 1, ); diff --git a/os2/OS2/REXX/Makefile.PL b/os2/OS2/REXX/Makefile.PL index 6648b2c..178ef7b 100644 --- a/os2/OS2/REXX/Makefile.PL +++ b/os2/OS2/REXX/Makefile.PL @@ -3,7 +3,7 @@ use ExtUtils::MakeMaker; WriteMakefile( NAME => 'OS2::REXX', VERSION => '0.22', - MAN3PODS => ' ', # Pods will be built by installman. + MAN3PODS => {}, # Pods will be built by installman. XSPROTOARG => '-noprototypes', PERL_MALLOC_OK => 1, ); diff --git a/patchlevel.h b/patchlevel.h index 46918d1..d0d21ff 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL8132" + ,"DEVEL8199" ,NULL }; diff --git a/perlapi.c b/perlapi.c index dc6228f..bb32970 100644 --- a/perlapi.c +++ b/perlapi.c @@ -85,6 +85,13 @@ Perl_Gv_AMupdate(pTHXo_ HV* stash) return ((CPerlObj*)pPerl)->Perl_Gv_AMupdate(stash); } +#undef Perl_gv_handler +CV* +Perl_gv_handler(pTHXo_ HV* stash, I32 id) +{ + return ((CPerlObj*)pPerl)->Perl_gv_handler(stash, id); +} + #undef Perl_apply_attrs_string void Perl_apply_attrs_string(pTHXo_ char *stashpv, CV *cv, char *attrstr, STRLEN len) @@ -2312,6 +2319,13 @@ Perl_regdump(pTHXo_ regexp* r) ((CPerlObj*)pPerl)->Perl_regdump(r); } +#undef Perl_regclass_swash +SV* +Perl_regclass_swash(pTHXo_ struct regnode *n, bool doinit, SV **initsvp) +{ + return ((CPerlObj*)pPerl)->Perl_regclass_swash(n, doinit, initsvp); +} + #undef Perl_pregexec I32 Perl_pregexec(pTHXo_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave) @@ -4096,6 +4110,8 @@ Perl_sys_intern_init(pTHXo) #if defined(PERL_IN_SV_C) || defined(PERL_DECL_PROT) # if defined(DEBUGGING) # endif +# if !defined(NV_PRESERVES_UV) +# endif #endif #if defined(PERL_IN_TOKE_C) || defined(PERL_DECL_PROT) #if 0 diff --git a/perlio.h b/perlio.h index b2e5179..0c6b26a 100644 --- a/perlio.h +++ b/perlio.h @@ -307,7 +307,7 @@ extern int PerlIO_setpos (PerlIO *,SV *); #ifndef PerlIO_fdupopen extern PerlIO * PerlIO_fdupopen (pTHX_ PerlIO *); #endif -#ifndef PerlIO_modestr +#if !defined(PerlIO_modestr) && !defined(PERLIO_IS_STDIO) extern char *PerlIO_modestr (PerlIO *,char *buf); #endif #ifndef PerlIO_isutf8 @@ -320,10 +320,14 @@ extern int PerlIO_apply_layers (pTHX_ PerlIO *f, const char *mode, const char *n extern int PerlIO_binmode (pTHX_ PerlIO *f, int iotype, int omode, const char *names); #endif +#ifndef PERLIO_IS_STDIO + extern void PerlIO_cleanup(); extern void PerlIO_debug(const char *fmt,...); +#endif + END_EXTERN_C #endif /* _PERLIO_H */ diff --git a/perliol.h b/perliol.h index 429ddab..04c7071 100644 --- a/perliol.h +++ b/perliol.h @@ -78,7 +78,8 @@ extern PerlIO_funcs PerlIO_unix; extern PerlIO_funcs PerlIO_perlio; extern PerlIO_funcs PerlIO_stdio; extern PerlIO_funcs PerlIO_crlf; -extern PerlIO_funcs PerlIO_pending; +/* The EXT is need for Cygwin -- but why only for _pending? --jhi */ +EXT PerlIO_funcs PerlIO_pending; #ifdef HAS_MMAP extern PerlIO_funcs PerlIO_mmap; #endif diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 8041f68..f7ad2d3 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -298,7 +298,7 @@ L. SV* cv_const_sv(CV* cv) =for hackers -Found in file opmini.c +Found in file op.c =item dMARK @@ -1045,7 +1045,8 @@ Found in file scope.h =item looks_like_number Test if an the content of an SV looks like a number (or is a -number). +number). C and C are treated as numbers (so will not +issue a non-numeric warning), even if your atof() doesn't grok them. I32 looks_like_number(SV* sv) @@ -1178,7 +1179,7 @@ eligible for inlining at compile-time. CV* newCONSTSUB(HV* stash, char* name, SV* sv) =for hackers -Found in file opmini.c +Found in file op.c =item newHV @@ -1324,7 +1325,7 @@ Found in file sv.c Used by C to hook up XSUBs as Perl subs. =for hackers -Found in file opmini.c +Found in file op.c =item newXSproto @@ -2434,6 +2435,15 @@ Type flag for blessed scalars. See C. =for hackers Found in file sv.h +=item SvUOK + +Returns a boolean indicating whether the SV contains an unsigned integer. + + void SvUOK(SV* sv) + +=for hackers +Found in file sv.h + =item SvUPGRADE Used to upgrade an SV to a more complex form. Uses C to diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 9baf175..a27dde7 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -402,6 +402,11 @@ L for more on portability concerns. (W closed) You tried to do a bind on a closed socket. Did you forget to check the return value of your socket() call? See L. +=item binmode() on closed filehandle %s + +(W unopened) You tried binmode() on a filehandle that was never opened. +Check you control flow and number of arguments. + =item Bit vector size > 32 non-portable (W portable) Using bit vector sizes larger than 32 is non-portable. @@ -1387,7 +1392,7 @@ name. =item flock() on closed filehandle %s (W closed) The filehandle you're attempting to flock() got itself closed -some time before now. Check your logic flow. flock() operates on +some time before now. Check your control flow. flock() operates on filehandles. Are you attempting to call flock() on a dirhandle by the same name? @@ -1720,6 +1725,11 @@ silently ignored. (F) Your machine apparently doesn't implement ioctl(), which is pretty strange for a machine that supports C. +=item ioctl() on unopened %s + +(W unopened) You tried ioctl() on a filehandle that was never opened. +Check you control flow and number of arguments. + =item `%s' is not a code reference (W) The second (fourth, sixth, ...) argument of overload::constant needs @@ -2277,9 +2287,9 @@ the buffer and zero pad the new area. =item -%s on unopened filehandle %s (W unopened) You tried to invoke a file test operator on a filehandle -that isn't open. Check your logic. See also L. +that isn't open. Check your control flow. See also L. -=item %s() on unopened %s %s +=item %s() on unopened %s (W unopened) An I/O operation was attempted on a filehandle that was never initialized. You need to do an open(), a sysopen(), or a socket() @@ -2734,12 +2744,12 @@ See Server error. =item printf() on closed filehandle %s (W closed) The filehandle you're writing to got itself closed sometime -before now. Check your logic flow. +before now. Check your control flow. =item print() on closed filehandle %s (W closed) The filehandle you're printing on got itself closed sometime -before now. Check your logic flow. +before now. Check your control flow. =item Process terminated by SIG%s @@ -2778,7 +2788,7 @@ by prepending "0" to your numbers. =item readline() on closed filehandle %s (W closed) The filehandle you're reading from got itself closed sometime -before now. Check your logic flow. +before now. Check your control flow. =item Reallocation too large: %lx @@ -2943,7 +2953,7 @@ scalar that had previously been marked as free. =item send() on closed socket %s (W closed) The socket you're sending to got itself closed sometime -before now. Check your logic flow. +before now. Check your control flow. =item Sequence (? incomplete before << HERE mark in regex m/%s/ @@ -3218,7 +3228,7 @@ unconfigured. Consult your system support. =item syswrite() on closed filehandle %s (W closed) The filehandle you're writing to got itself closed sometime -before now. Check your logic flow. +before now. Check your control flow. =item Target of goto is too deeply nested @@ -3852,7 +3862,7 @@ So put in parentheses to say what you really mean. =item write() on closed filehandle %s (W closed) The filehandle you're writing to got itself closed sometime -before now. Check your logic flow. +before now. Check your control flow. =item X outside of string diff --git a/pod/perlfaq3.pod b/pod/perlfaq3.pod index 1d06c2d..5e15014 100644 --- a/pod/perlfaq3.pod +++ b/pod/perlfaq3.pod @@ -73,10 +73,11 @@ why what it's doing isn't what it should be doing. =head2 How do I profile my Perl programs? -You should get the Devel::DProf module from CPAN and also use -Benchmark.pm from the standard distribution. Benchmark lets you time -specific portions of your code, while Devel::DProf gives detailed -breakdowns of where your code spends its time. +You should get the Devel::DProf module from the standard distribution +(or separately on CPAN) and also use Benchmark.pm from the standard +distribution. The Benchmark module lets you time specific portions of +your code, while Devel::DProf gives detailed breakdowns of where your +code spends its time. Here's a sample use of Benchmark: @@ -180,12 +181,40 @@ your hard-earned cash for. PerlBuilder (http://www.solutionsoft.com/perl.htm) is an integrated development environment for Windows that supports Perl development. +VisualPerl (http://www.activestate.com/IDE) is also an integrated +development environment for Windows, Unix, and several Open Source OSes +that supports Perl development. Perl code magic is another IDE +(http://www.petes-place.com/codemagic.html). CodeMagicCD +(http://www.codemagiccd.com/) is a commercial IDE. + Perl programs are just plain text, though, so you could download emacs for Windows (http://www.gnu.org/software/emacs/windows/ntemacs.html) -or a vi clone (vim) which runs on for win32 -(http://www.cs.vu.nl/%7Etmgil/vi.html). If you're transferring -Windows files to Unix be sure to transfer them in ASCII mode so the ends -of lines are appropriately mangled. +or a vi clone such as nvi (available from CPAN in src/misc/) or vim +(http://www.vim.org/). Vim runs on win32 +(http://www.cs.vu.nl/%7Etmgil/vi.html). Vile is another widely ported +vi clone that has a Perl language sensitivity module +(http://www.clark.net/pub/dickey/vile/vile.html). SlickEdit +(http://www.slickedit.com/) is a full featured commercial editor that +has a modular architecture: it can emulate several other common +editors and it can help with programming language sensitivity modules +for a variety of programming languages including Perl. If you're +transferring Windows text files to Unix be sure to transfer them in +ASCII mode so the ends of lines are appropriately mangled. There is +also a toyedit Text widget based editor written in Perl that is +distributed with the Tk module on CPAN. The ptkdb +(http://world.std.com/~aep/ptkdb/) is a Perl/tk based debugger that +acts as a development environment of sorts. Perl Composer +(http://perlcomposer.sourceforge.net/vperl.html) is an IDE for Perl/Tk +GUI creation. + +On Mac OS the MacPerl Application comes with a simple 32k text editor +that behaves like a rudimentary IDE. In contrast to the MacPerl Application +the MPW Perl tool can make use of the MPW Shell itself as an editor (with +no 32k limit). BBEdit and BBEdit Lite are text editors for Mac OS +that have a Perl sensitivity mode (http://web.barebones.com/). +Alpha is an editor, written and extensible in Tcl, that nonetheless has +built in support for several popular markup and programming languages +including Perl and HTML (http://alpha.olm.net/). =head2 Where can I get Perl macros for vi? diff --git a/pod/perlintern.pod b/pod/perlintern.pod index 11d9385..b63b694 100644 --- a/pod/perlintern.pod +++ b/pod/perlintern.pod @@ -12,6 +12,18 @@ B! =over 8 +=item djSP + +Declare Just C. This is actually identical to C, and declares +a local copy of perl's stack pointer, available via the C macro. +See C. (Available for backward source code compatibility with the +old (Perl 5.005) thread model.) + + djSP; + +=for hackers +Found in file pp.h + =item is_gv_magical Returns C if given the name of a magical GV. @@ -27,6 +39,18 @@ allow selecting particular classes of magical variable. =for hackers Found in file gv.c +=item start_glob + +Function called by C to spawn a glob (or do the glob inside +perl on VMS). This code used to be inline, but now perl uses C +this glob starter is only used by miniperl during the build proccess. +Moving it away shrinks pp_hot.c; shrinking pp_hot.c helps speed perl up. + + PerlIO* start_glob(SV* pattern, IO *io) + +=for hackers +Found in file doio.c + =back =head1 AUTHORS diff --git a/pod/perlmodlib.pod b/pod/perlmodlib.pod index c5afea2..1810e00 100644 --- a/pod/perlmodlib.pod +++ b/pod/perlmodlib.pod @@ -114,6 +114,10 @@ Restrict unsafe operations when compiling Package for overloading perl operations +=item perlio + +Configure C level IO + =item re Alter regular expression behaviour @@ -288,10 +292,6 @@ Wrapper around CPAN.pm without using any XS module Warn of errors (from perspective of caller) -=item Carp::Heavy - -Carp guts - =item Class::Struct Declare struct-like datatypes as Perl classes @@ -444,6 +444,10 @@ Create or remove directory trees Portably perform operations on file names +=item File::Spec::Epoc + +Methods for Epoc file specs + =item File::Spec::Functions Portably perform operations on file names @@ -484,6 +488,10 @@ Keep more files open than the system permits Supply object methods for filehandles +=item Filter::Simple + +Simplified source filtering + =item FindBin Locate directory of original perl script @@ -791,7 +799,7 @@ Most importantly, CPAN includes around a thousand unbundled modules, some of which require a C compiler to build. Major categories of modules are: -=over 4 +=over =item * Language Extensions and Documentation Tools @@ -861,7 +869,7 @@ Miscellaneous Modules Registered CPAN sites as of this writing include the following. You should try to choose one close to you: -=over 4 +=over =item Africa @@ -1217,6 +1225,12 @@ If adding a new module to a set, follow the original author's standards for naming modules and the interface to methods in those modules. +If developing modules for private internal or project specific use, +that will never be released to the public, then you should ensure +that their names will not clash with any future public module. You +can do this either by using the reserved Local::* category or by +using a category name that includes an underscore like Foo_Corp::*. + To be portable each component of a module name should be limited to 11 characters. If it might be used on MS-DOS then try to ensure each is unique in the first 8 characters. Nested modules make this easier. diff --git a/pod/perltoc.pod b/pod/perltoc.pod index b34ecd6..569f4eb 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -62,8 +62,8 @@ compare with other languages like Java, Python, REXX, Scheme, or Tcl?, Can I do [task] in Perl?, When shouldn't I program in Perl?, What's the difference between "perl" and "Perl"?, Is it a Perl program or a Perl script?, What is a JAPH?, Where can I get a list of Larry Wall witticisms?, -How can I convince my sysadmin/supervisor/employees to use (version -5/5.005/Perl) instead of some other language?, L: Obtaining and +How can I convince my sysadmin/supervisor/employees to use version +5/5.005/Perl instead of some other language?, L: Obtaining and Learning about Perl, What machines support Perl? Where do I get it?, How can I get a binary version of Perl?, I don't have a C compiler on my system. How can I compile perl?, I copied the Perl binary from one machine @@ -72,80 +72,81 @@ compile but gdbm/dynamic loading/malloc/linking/... failed. How do I make it work?, What modules and extensions are available for Perl? What is CPAN? What does CPAN/src/... mean?, Is there an ISO or ANSI certified version of Perl?, Where can I get information on Perl?, What are the Perl -newsgroups on USENET? Where do I post questions?, Where should I post +newsgroups on Usenet? Where do I post questions?, Where should I post source code?, Perl Books, Perl in Magazines, Perl on the Net: FTP and WWW -Access, What mailing lists are there for perl?, Archives of +Access, What mailing lists are there for Perl?, Archives of comp.lang.perl.misc, Where can I buy a commercial version of Perl?, Where -do I send bug reports?, What is perl.com?, L: Programming Tools, -How do I do (anything)?, How can I use Perl interactively?, Is there a Perl -shell?, How do I debug my Perl programs?, How do I profile my Perl -programs?, How do I cross-reference my Perl programs?, Is there a -pretty-printer (formatter) for Perl?, Is there a ctags for Perl?, Is there -an IDE or Windows Perl Editor?, Where can I get Perl macros for vi?, Where -can I get perl-mode for emacs?, How can I use curses with Perl?, How can I -use X or Tk with Perl?, How can I generate simple menus without using CGI -or Tk?, What is undump?, How can I make my Perl program run faster?, How -can I make my Perl program take less memory?, Is it unsafe to return a -pointer to local data?, How can I free an array or hash so my program -shrinks?, How can I make my CGI script more efficient?, How can I hide the -source for my Perl program?, How can I compile my Perl program into byte -code or C?, How can I compile Perl into Java?, How can I get C<#!perl> to -work on [MS-DOS,NT,...]?, Can I write useful perl programs on the command -line?, Why don't perl one-liners work on my DOS/Mac/VMS system?, Where can -I learn about CGI or Web programming in Perl?, Where can I learn about -object-oriented Perl programming?, Where can I learn about linking C with -Perl? [h2xs, xsubpp], I've read perlembed, perlguts, etc., but I can't -embed perl in my C program; what am I doing wrong?, When I tried to run my -script, I got this message. What does it mean?, What's MakeMaker?, -L: Data Manipulation, Why am I getting long decimals (eg, -19.9499999999999) instead of the numbers I should be getting (eg, 19.95)?, -Why isn't my octal data interpreted correctly?, Does Perl have a round() -function? What about ceil() and floor()? Trig functions?, How do I -convert bits into ints?, Why doesn't & work the way I want it to?, How do I -multiply matrices?, How do I perform an operation on a series of integers?, -How can I output Roman numerals?, Why aren't my random numbers random?, How -do I find the week-of-the-year/day-of-the-year?, How do I find the current -century or millennium?, How can I compare two dates and find the -difference?, How can I take a string and turn it into epoch seconds?, How -can I find the Julian Day?, How do I find yesterday's date?, Does Perl have -a year 2000 problem? Is Perl Y2K compliant?, How do I validate input?, How -do I unescape a string?, How do I remove consecutive pairs of characters?, -How do I expand function calls in a string?, How do I find matching/nesting -anything?, How do I reverse a string?, How do I expand tabs in a string?, -How do I reformat a paragraph?, How can I access/change the first N letters -of a string?, How do I change the Nth occurrence of something?, How can I -count the number of occurrences of a substring within a string?, How do I -capitalize all the words on one line?, How can I split a [character] -delimited string except when inside [character]? (Comma-separated files), -How do I strip blank space from the beginning/end of a string?, How do I -pad a string with blanks or pad a number with zeroes?, How do I extract -selected columns from a string?, How do I find the soundex value of a -string?, How can I expand variables in text strings?, What's wrong with -always quoting "$vars"?, Why don't my <: Programming Tools, How do I do (anything)?, How can I use Perl +interactively?, Is there a Perl shell?, How do I debug my Perl programs?, +How do I profile my Perl programs?, How do I cross-reference my Perl +programs?, Is there a pretty-printer (formatter) for Perl?, Is there a +ctags for Perl?, Is there an IDE or Windows Perl Editor?, Where can I get +Perl macros for vi?, Where can I get perl-mode for emacs?, How can I use +curses with Perl?, How can I use X or Tk with Perl?, How can I generate +simple menus without using CGI or Tk?, What is undump?, How can I make my +Perl program run faster?, How can I make my Perl program take less memory?, +Is it unsafe to return a pointer to local data?, How can I free an array or +hash so my program shrinks?, How can I make my CGI script more efficient?, +How can I hide the source for my Perl program?, How can I compile my Perl +program into byte code or C?, How can I compile Perl into Java?, How can I +get C<#!perl> to work on [MS-DOS,NT,...]?, Can I write useful Perl programs +on the command line?, Why don't Perl one-liners work on my DOS/Mac/VMS +system?, Where can I learn about CGI or Web programming in Perl?, Where can +I learn about object-oriented Perl programming?, Where can I learn about +linking C with Perl? [h2xs, xsubpp], I've read perlembed, perlguts, etc., +but I can't embed perl in my C program; what am I doing wrong?, When I +tried to run my script, I got this message. What does it mean?, What's +MakeMaker?, L: Data Manipulation, Why am I getting long decimals +(eg, 19.9499999999999) instead of the numbers I should be getting (eg, +19.95)?, Why isn't my octal data interpreted correctly?, Does Perl have a +round() function? What about ceil() and floor()? Trig functions?, How do +I convert bits into ints?, Why doesn't & work the way I want it to?, How do +I multiply matrices?, How do I perform an operation on a series of +integers?, How can I output Roman numerals?, Why aren't my random numbers +random?, How do I find the week-of-the-year/day-of-the-year?, How do I find +the current century or millennium?, How can I compare two dates and find +the difference?, How can I take a string and turn it into epoch seconds?, +How can I find the Julian Day?, How do I find yesterday's date?, Does Perl +have a Year 2000 problem? Is Perl Y2K compliant?, How do I validate +input?, How do I unescape a string?, How do I remove consecutive pairs of +characters?, How do I expand function calls in a string?, How do I find +matching/nesting anything?, How do I reverse a string?, How do I expand +tabs in a string?, How do I reformat a paragraph?, How can I access/change +the first N letters of a string?, How do I change the Nth occurrence of +something?, How can I count the number of occurrences of a substring within +a string?, How do I capitalize all the words on one line?, How can I split +a [character] delimited string except when inside [character]? +(Comma-separated files), How do I strip blank space from the beginning/end +of a string?, How do I pad a string with blanks or pad a number with +zeroes?, How do I extract selected columns from a string?, How do I find +the soundex value of a string?, How can I expand variables in text +strings?, What's wrong with always quoting "$vars"?, Why don't my <: -Networking, My CGI script runs from the command line but not the browser. -(500 Server Error), How can I get better error messages from a CGI -program?, How do I remove HTML from a string?, How do I extract URLs?, How -do I download a file from the user's machine? How do I open a file on -another machine?, How do I make a pop-up menu in HTML?, How do I fetch an -HTML file?, How do I automate an HTML form submission?, How do I decode or -create those %-encodings on the web?, How do I redirect to another page?, -How do I put a password on my web pages?, How do I edit my .htpasswd and -.htgroup files with Perl?, How do I make sure users can't enter values into -a form that cause my CGI script to do bad things?, How do I parse a mail -header?, How do I decode a CGI form?, How do I check a valid mail address?, -How do I decode a MIME/BASE64 string?, How do I return the user's mail -address?, How do I send mail?, How do I read mail?, How do I find out my -hostname/domainname/IP address?, How do I fetch a news article or the -active newsgroups?, How do I fetch/put an FTP file?, How can I do RPC in -Perl? +daemon process?, How do I find out if I'm running interactively or not?, +How do I timeout a slow event?, How do I set CPU limits?, How do I avoid +zombies on a Unix system?, How do I use an SQL database?, How do I make a +system() exit on control-C?, How do I open a file without blocking?, How do +I install a module from CPAN?, What's the difference between require and +use?, How do I keep my own module/library directory?, How do I add the +directory my program lives in to the module/library search path?, How do I +add a directory to my include path at runtime?, What is socket.ph and where +do I get it?, L: Networking, My CGI script runs from the command +line but not the browser. (500 Server Error), How can I get better error +messages from a CGI program?, How do I remove HTML from a string?, How do I +extract URLs?, How do I download a file from the user's machine? How do I +open a file on another machine?, How do I make a pop-up menu in HTML?, How +do I fetch an HTML file?, How do I automate an HTML form submission?, How +do I decode or create those %-encodings on the web?, How do I redirect to +another page?, How do I put a password on my web pages?, How do I edit my +.htpasswd and .htgroup files with Perl?, How do I make sure users can't +enter values into a form that cause my CGI script to do bad things?, How do +I parse a mail header?, How do I decode a CGI form?, How do I check a valid +mail address?, How do I decode a MIME/BASE64 string?, How do I return the +user's mail address?, How do I send mail?, How do I read mail?, How do I +find out my hostname/domainname/IP address?, How do I fetch a news article +or the active newsgroups?, How do I fetch/put an FTP file?, How can I do +RPC in Perl? =over 4 @@ -1786,7 +1786,10 @@ DESTROY this =item Tying Arrays TIEARRAY classname, LIST, FETCH this, index, STORE this, index, value, -UNTIE this, DESTROY this +FETCHSIZE this, STORESIZE this, count, EXTEND this, count, EXISTS this, +key, DELETE this, key, CLEAR this, PUSH this, LIST, POP this, SHIFT this, +UNSHIFT this, LIST, SPLICE this, offset, length, LIST, UNTIE this, DESTROY +this =item Tying Hashes @@ -2503,7 +2506,7 @@ chcp, dataset access, OS/390 iconv, locales attributes, attrs, autouse, base, blib, bytes, charnames, constant, diagnostics, fields, filetest, integer, less, locale, open, ops, overload, -re, sigtrap, strict, subs, utf8, vars, warnings, warnings::register +perlio, re, sigtrap, strict, subs, utf8, vars, warnings, warnings::register =item Standard Modules @@ -2512,29 +2515,30 @@ B::Bytecode, B::C, B::CC, B::Debug, B::Deparse, B::Disassembler, B::Lint, B::Showlex, B::Stackobj, B::Stash, B::Terse, B::Xref, Benchmark, ByteLoader, CGI, CGI::Apache, CGI::Carp, CGI::Cookie, CGI::Fast, CGI::Pretty, CGI::Push, CGI::Switch, CPAN, CPAN::FirstTime, CPAN::Nox, -Carp, Carp::Heavy, Class::Struct, Cwd, DB, DB_File, Devel::SelfStubber, -DirHandle, Dumpvalue, Encode, English, Env, Exporter, Exporter::Heavy, +Carp, Class::Struct, Cwd, DB, DB_File, Devel::SelfStubber, DirHandle, +Dumpvalue, Encode, English, Env, Exporter, Exporter::Heavy, ExtUtils::Command, ExtUtils::Embed, ExtUtils::Install, ExtUtils::Installed, ExtUtils::Liblist, ExtUtils::MM_Cygwin, ExtUtils::MM_OS2, ExtUtils::MM_Unix, ExtUtils::MM_VMS, ExtUtils::MM_Win32, ExtUtils::MakeMaker, ExtUtils::Manifest, ExtUtils::Mkbootstrap, ExtUtils::Mksymlists, ExtUtils::Packlist, ExtUtils::testlib, Fatal, Fcntl, File::Basename, File::CheckTree, File::Compare, File::Copy, File::DosGlob, -File::Find, File::Path, File::Spec, File::Spec::Functions, File::Spec::Mac, -File::Spec::OS2, File::Spec::Unix, File::Spec::VMS, File::Spec::Win32, -File::Temp, File::stat, FileCache, FileHandle, FindBin, Getopt::Long, -Getopt::Std, I18N::Collate, IO, IPC::Open2, IPC::Open3, Math::BigFloat, -Math::BigInt, Math::Complex, Math::Trig, NDBM_File, Net::Ping, -Net::hostent, Net::netent, Net::protoent, Net::servent, O, ODBM_File, -Opcode, Pod::Checker, Pod::Find, Pod::Html, Pod::InputObjects, Pod::LaTeX, -Pod::Man, Pod::ParseUtils, Pod::Parser, Pod::Plainer, Pod::Select, -Pod::Text, Pod::Text::Color, Pod::Text::Termcap, Pod::Usage, SDBM_File, -Safe, Search::Dict, SelectSaver, SelfLoader, Shell, Socket, Storable, -Symbol, Term::ANSIColor, Term::Cap, Term::Complete, Term::ReadLine, Test, -Test::Harness, Text::Abbrev, Text::ParseWords, Text::Soundex, Text::Wrap, -Tie::Array, Tie::Handle, Tie::Hash, Tie::RefHash, Tie::Scalar, -Tie::SubstrHash, Time::Local, Time::gmtime, Time::localtime, Time::tm, -UNIVERSAL, User::grent, User::pwent +File::Find, File::Path, File::Spec, File::Spec::Epoc, +File::Spec::Functions, File::Spec::Mac, File::Spec::OS2, File::Spec::Unix, +File::Spec::VMS, File::Spec::Win32, File::Temp, File::stat, FileCache, +FileHandle, Filter::Simple, FindBin, Getopt::Long, Getopt::Std, +I18N::Collate, IO, IPC::Open2, IPC::Open3, Math::BigFloat, Math::BigInt, +Math::Complex, Math::Trig, NDBM_File, Net::Ping, Net::hostent, Net::netent, +Net::protoent, Net::servent, O, ODBM_File, Opcode, Pod::Checker, Pod::Find, +Pod::Html, Pod::InputObjects, Pod::LaTeX, Pod::Man, Pod::ParseUtils, +Pod::Parser, Pod::Plainer, Pod::Select, Pod::Text, Pod::Text::Color, +Pod::Text::Termcap, Pod::Usage, SDBM_File, Safe, Search::Dict, SelectSaver, +SelfLoader, Shell, Socket, Storable, Symbol, Term::ANSIColor, Term::Cap, +Term::Complete, Term::ReadLine, Test, Test::Harness, Text::Abbrev, +Text::ParseWords, Text::Soundex, Text::Wrap, Tie::Array, Tie::Handle, +Tie::Hash, Tie::RefHash, Tie::Scalar, Tie::SubstrHash, Time::Local, +Time::gmtime, Time::localtime, Time::tm, UNIVERSAL, User::grent, +User::pwent =item Extension Modules @@ -2694,8 +2698,8 @@ Scheme, or Tcl? =item Where can I get a list of Larry Wall witticisms? -=item How can I convince my sysadmin/supervisor/employees to use (version -5/5.005/Perl) instead of some other language? +=item How can I convince my sysadmin/supervisor/employees to use version +5/5.005/Perl instead of some other language? =back @@ -3181,7 +3185,7 @@ file? =item What does it mean that regexes are greedy? How can I get around it? -=item How do I process each word on each line? +=item How do I process each word on each line? =item How can I print out a word-frequency or line-frequency summary? @@ -3773,7 +3777,7 @@ C, C, C =item The INPUT: Keyword -=item The IN/OUTLIST/IN_OUTLIST Keywords +=item The IN/OUTLIST/IN_OUTLIST/OUT/IN_OUT Keywords =item Variable-length Parameter Lists @@ -3929,6 +3933,8 @@ C =back +=item Examining internal data structures with the C functions + =item How multiple interpreters and concurrency are supported =over 4 @@ -4190,9 +4196,9 @@ SvNOK_off, SvNOK_on, SvNOK_only, SvNV, SvNVX, SvOK, SvOOK, SvPOK, SvPOKp, SvPOK_off, SvPOK_on, SvPOK_only, SvPOK_only_UTF8, SvPV, SvPVX, SvPV_force, SvPV_nolen, SvREFCNT, SvREFCNT_dec, SvREFCNT_inc, SvROK, SvROK_off, SvROK_on, SvRV, SvSETMAGIC, SvSetSV, SvSetSV_nosteal, SvSTASH, SvTAINT, -SvTAINTED, SvTAINTED_off, SvTAINTED_on, SvTRUE, SvTYPE, svtype, SVt_IV, -SVt_NV, SVt_PV, SVt_PVAV, SVt_PVCV, SVt_PVHV, SVt_PVMG, SvUPGRADE, SvUTF8, -SvUTF8_off, SvUTF8_on, SvUV, SvUVX, sv_2mortal, sv_bless, sv_catpv, +SvTAINTED, SvTAINTED_off, SvTAINTED_on, SvTRUE, svtype, SvTYPE, SVt_IV, +SVt_NV, SVt_PV, SVt_PVAV, SVt_PVCV, SVt_PVHV, SVt_PVMG, SvUOK, SvUPGRADE, +SvUTF8, SvUTF8_off, SvUTF8_on, SvUV, SvUVX, sv_2mortal, sv_bless, sv_catpv, sv_catpvf, sv_catpvf_mg, sv_catpvn, sv_catpvn_mg, sv_catpv_mg, sv_catsv, sv_catsv_mg, sv_chop, sv_clear, sv_cmp, sv_cmp_locale, sv_dec, sv_derived_from, sv_eq, sv_free, sv_gets, sv_grow, sv_inc, sv_insert, @@ -4202,13 +4208,14 @@ sv_rvweaken, sv_setiv, sv_setiv_mg, sv_setnv, sv_setnv_mg, sv_setpv, sv_setpvf, sv_setpvf_mg, sv_setpviv, sv_setpviv_mg, sv_setpvn, sv_setpvn_mg, sv_setpv_mg, sv_setref_iv, sv_setref_nv, sv_setref_pv, sv_setref_pvn, sv_setsv, sv_setsv_mg, sv_setuv, sv_setuv_mg, sv_true, -sv_unmagic, sv_unref, sv_upgrade, sv_usepvn, sv_usepvn_mg, +sv_unmagic, sv_unref, sv_unref_flags, sv_upgrade, sv_usepvn, sv_usepvn_mg, sv_utf8_downgrade, sv_utf8_encode, sv_utf8_upgrade, sv_vcatpvfn, -sv_vsetpvfn, THIS, toLOWER, toUPPER, U8 *s, utf8_to_bytes, utf8_to_uv, -utf8_to_uv_simple, warn, XPUSHi, XPUSHn, XPUSHp, XPUSHs, XPUSHu, XS, -XSRETURN, XSRETURN_EMPTY, XSRETURN_IV, XSRETURN_NO, XSRETURN_NV, -XSRETURN_PV, XSRETURN_UNDEF, XSRETURN_YES, XST_mIV, XST_mNO, XST_mNV, -XST_mPV, XST_mUNDEF, XST_mYES, XS_VERSION, XS_VERSION_BOOTCHECK, Zero +sv_vsetpvfn, THIS, toLOWER, toUPPER, U8 *s, utf8_distance, utf8_hop, +utf8_length, utf8_to_bytes, utf8_to_uv, utf8_to_uv_simple, warn, XPUSHi, +XPUSHn, XPUSHp, XPUSHs, XPUSHu, XS, XSRETURN, XSRETURN_EMPTY, XSRETURN_IV, +XSRETURN_NO, XSRETURN_NV, XSRETURN_PV, XSRETURN_UNDEF, XSRETURN_YES, +XST_mIV, XST_mNO, XST_mNV, XST_mPV, XST_mUNDEF, XST_mYES, XS_VERSION, +XS_VERSION_BOOTCHECK, Zero =item AUTHORS @@ -4223,7 +4230,7 @@ XST_mPV, XST_mUNDEF, XST_mYES, XS_VERSION, XS_VERSION_BOOTCHECK, Zero =item DESCRIPTION -is_gv_magical +djSP, is_gv_magical, start_glob =item AUTHORS @@ -4762,6 +4769,8 @@ accidentally using the context of the sort() itself) =item Linux With Sfio Fails op/misc Test 48 +=item sprintf tests 129 and 130 + =item Storable tests fail in some platforms =item Threads Are Still Experimental @@ -5578,7 +5587,7 @@ PERL_SH_DIR too long, Process terminated by SIG%s =back -=head2 perlamiga - Perl under Amiga OS (possibly very outdated information) +=head2 perlamiga - Perl under Amiga OS =over 4 @@ -5633,13 +5642,15 @@ finally close()d =item Making +sh Configure -Dprefix=/ade -Dloclibpth=/ade/lib + =item Testing =item Installing the built perl =back -=item AUTHOR +=item AUTHORS =item SEE ALSO @@ -7893,9 +7904,36 @@ distribution, Signals =item Programmer's interface -expand($type,@things), Programming Examples - -=item Methods in the four Classes +expand($type,@things), expandany(@things), Programming Examples + +=item Methods in the other Classes + +CPAN::Author::as_glimpse(), CPAN::Author::as_string(), +CPAN::Author::email(), CPAN::Author::fullname(), CPAN::Author::name(), +CPAN::Bundle::as_glimpse(), CPAN::Bundle::as_string(), +CPAN::Bundle::clean(), CPAN::Bundle::contains(), +CPAN::Bundle::force($method,@args), CPAN::Bundle::get(), +CPAN::Bundle::inst_file(), CPAN::Bundle::inst_version(), +CPAN::Bundle::uptodate(), CPAN::Bundle::install(), CPAN::Bundle::make(), +CPAN::Bundle::readme(), CPAN::Bundle::test(), +CPAN::Distribution::as_glimpse(), CPAN::Distribution::as_string(), +CPAN::Distribution::clean(), CPAN::Distribution::containsmods(), +CPAN::Distribution::cvs_import(), CPAN::Distribution::dir(), +CPAN::Distribution::force($method,@args), CPAN::Distribution::get(), +CPAN::Distribution::install(), CPAN::Distribution::isa_perl(), +CPAN::Distribution::look(), CPAN::Distribution::make(), +CPAN::Distribution::prereq_pm(), CPAN::Distribution::readme(), +CPAN::Distribution::test(), CPAN::Distribution::uptodate(), +CPAN::Index::force_reload(), CPAN::Index::reload(), CPAN::InfoObj::dump(), +CPAN::Module::as_glimpse(), CPAN::Module::as_string(), +CPAN::Module::clean(), CPAN::Module::cpan_file(), +CPAN::Module::cpan_version(), CPAN::Module::cvs_import(), +CPAN::Module::description(), CPAN::Module::force($method,@args), +CPAN::Module::get(), CPAN::Module::inst_file(), +CPAN::Module::inst_version(), CPAN::Module::install(), +CPAN::Module::look(), CPAN::Module::make(), +CPAN::Module::manpage_headline(), CPAN::Module::readme(), +CPAN::Module::test(), CPAN::Module::uptodate(), CPAN::Module::userid() =item Cache Manager @@ -8010,16 +8048,6 @@ module =back -=head2 Carp::Heavy - Carp guts - -=over 4 - -=item SYNOPIS - -=item DESCRIPTION - -=back - =head2 Class::Struct - declare struct-like datatypes as Perl classes =over 4 @@ -8157,17 +8185,17 @@ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C, C, C, C, C, -C +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C =item e @@ -8240,10 +8268,10 @@ C, C, C, C, C, C =item n -C, C, C, C, -C, C, C, C, C, C, -C, C, C, C, C, -C, C, C +C, C, C, C, +C, C, C, C, C, +C, C, C, C, C, +C, C, C, C, C =item o @@ -8729,6 +8757,35 @@ Perl code =back +=head2 Encode::EncodeFormat, EncodeFormat - the format of encoding tables +of the Encode extension + +=over 4 + +=item DESCRIPTION + +[1] B, [2] B, [3] B, [4] B + +=item KEYWORDS + +=item COPYRIGHT + +=back + +=head2 EncodeFormat - the format of encoding tables of the Encode extension + +=over 4 + +=item DESCRIPTION + +[1] B, [2] B, [3] B, [4] B + +=item KEYWORDS + +=item COPYRIGHT + +=back + =head2 English - use nice English (or awk) names for ugly punctuation variables @@ -9428,6 +9485,10 @@ PERL_MM_OPT C I, C I, C I<$!>, C I +=item ENVIRONMENT + +B + =item SEE ALSO =item AUTHOR @@ -10002,6 +10063,12 @@ TopSystemUID =item WARNING +=over 4 + +=item Temporary files and NFS + +=back + =item HISTORY =item SEE ALSO @@ -10050,6 +10117,68 @@ $fh->print, $fh->printf, $fh->getline, $fh->getlines =back +=head2 Filter::Simple - Simplified source filtering + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=over 4 + +=item The Problem + +=item A Solution + +=item How it works + +=back + +=item AUTHOR + +=item COPYRIGHT + +=back + +=head2 Filter::Util::Call - Perl Source Filter Utility Module + +=over 4 + +=item DESCRIPTION + +=over 4 + +=item B + +=item B + +=item B + +B<$_>, B<$status>, B and B, B + +=back + +=item EXAMPLES + +=over 4 + +=item Example 1: A simple filter. + +=item Example 2: Using the context + +=item Example 3: Using the context within the filter + +=item Example 4: Using filter_del + +=back + +=item AUTHOR + +=item DATE + +=back + =head2 FindBin - Locate directory of original perl script =over 4 @@ -12766,7 +12895,7 @@ NORMAL TESTS, SKIPPED TESTS, TODO TESTS C, C, C, C, C +%s>, C =item ENVIRONMENT diff --git a/pp_ctl.c b/pp_ctl.c index d079e4a..aff5815 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -157,7 +157,7 @@ PP(pp_substcont) register char *m = cx->sb_m; char *orig = cx->sb_orig; register REGEXP *rx = cx->sb_rx; - + rxres_restore(&cx->sb_rxres, rx); if (cx->sb_iters++) { @@ -176,8 +176,8 @@ PP(pp_substcont) : (REXEC_COPY_STR|REXEC_IGNOREPOS|REXEC_NOT_FIRST)))) { SV *targ = cx->sb_targ; - sv_catpvn(dstr, s, cx->sb_strend - s); + sv_catpvn(dstr, s, cx->sb_strend - s); cx->sb_rxtainted |= RX_MATCH_TAINTED(rx); (void)SvOOK_off(targ); @@ -189,9 +189,11 @@ PP(pp_substcont) sv_free(dstr); TAINT_IF(cx->sb_rxtainted & 1); + if (pm->op_pmdynflags & PMdf_UTF8) + SvUTF8_on(targ); /* could also copy SvUTF8(dstr)? */ PUSHs(sv_2mortal(newSViv((I32)cx->sb_iters - 1))); - (void)SvPOK_only(targ); + (void)SvPOK_only_UTF8(targ); TAINT_IF(cx->sb_rxtainted); SvSETMAGIC(targ); SvTAINT(targ); @@ -209,7 +211,8 @@ PP(pp_substcont) cx->sb_strend = s + (cx->sb_strend - m); } cx->sb_m = m = rx->startp[0] + orig; - sv_catpvn(dstr, s, m-s); + if (m > s) + sv_catpvn(dstr, s, m-s); cx->sb_s = rx->endp[0] + orig; { /* Update the pos() information. */ SV *sv = cx->sb_targ; diff --git a/pp_hot.c b/pp_hot.c index 6a5b96f..2904d9f 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1179,6 +1179,7 @@ PP(pp_match) TARG = DEFSV; EXTEND(SP,1); } + PL_reg_sv = TARG; PUTBACK; /* EVAL blocks need stack_sp. */ s = SvPV(TARG, len); strend = s + len; @@ -1268,27 +1269,25 @@ play_it_again: RX_MATCH_TAINTED_on(rx); TAINT_IF(RX_MATCH_TAINTED(rx)); if (gimme == G_ARRAY) { - I32 iters, i, len; + I32 nparens, i, len; - iters = rx->nparens; - if (global && !iters) + nparens = rx->nparens; + if (global && !nparens) i = 1; else i = 0; SPAGAIN; /* EVAL blocks could move the stack. */ - EXTEND(SP, iters + i); - EXTEND_MORTAL(iters + i); - for (i = !i; i <= iters; i++) { + EXTEND(SP, nparens + i); + EXTEND_MORTAL(nparens + i); + for (i = !i; i <= nparens; i++) { PUSHs(sv_newmortal()); /*SUPPRESS 560*/ if ((rx->startp[i] != -1) && rx->endp[i] != -1 ) { len = rx->endp[i] - rx->startp[i]; s = rx->startp[i] + truebase; sv_setpvn(*SP, s, len); - if ((pm->op_pmdynflags & PMdf_UTF8) && !IN_BYTE) { + if (DO_UTF8(TARG)) SvUTF8_on(*SP); - sv_utf8_downgrade(*SP, TRUE); - } } } if (global) { @@ -1298,7 +1297,7 @@ play_it_again: r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST; goto play_it_again; } - else if (!iters) + else if (!nparens) XPUSHs(&PL_sv_yes); LEAVE_SCOPE(oldsave); RETURN; @@ -1831,6 +1830,7 @@ PP(pp_subst) TARG = DEFSV; EXTEND(SP,1); } + PL_reg_sv = TARG; if (SvFAKE(TARG) && SvREADONLY(TARG)) sv_force_normal(TARG); if (SvREADONLY(TARG) @@ -1847,7 +1847,7 @@ PP(pp_subst) if (PL_tainted) rxtainted |= 2; TAINT_NOT; - + force_it: if (!pm || !s) DIE(aTHX_ "panic: do_subst"); @@ -2004,6 +2004,8 @@ PP(pp_subst) rxtainted |= RX_MATCH_TAINTED(rx); dstr = NEWSV(25, len); sv_setpvn(dstr, m, s-m); + if (DO_UTF8(TARG)) + SvUTF8_on(dstr); PL_curpm = pm; if (!c) { register PERL_CONTEXT *cx; @@ -2030,7 +2032,8 @@ PP(pp_subst) sv_catpvn(dstr, c, clen); if (once) break; - } while (CALLREGEXEC(aTHX_ rx, s, strend, orig, s == m, TARG, NULL, r_flags)); + } while (CALLREGEXEC(aTHX_ rx, s, strend, orig, s == m, + TARG, NULL, r_flags)); sv_catpvn(dstr, s, strend - s); (void)SvOOK_off(TARG); diff --git a/pp_sys.c b/pp_sys.c index fd44fd3..b1ec92c 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -645,8 +645,15 @@ PP(pp_fileno) RETURN; } - if (!gv || !(io = GvIO(gv)) || !(fp = IoIFP(io))) + if (!gv || !(io = GvIO(gv)) || !(fp = IoIFP(io))) { + /* Can't do this because people seem to do things like + defined(fileno($foo)) to check whether $foo is a valid fh. + if (ckWARN2(WARN_UNOPENED,WARN_CLOSED)) + report_evil_fh(gv, io, PL_op->op_type); + */ RETPUSHUNDEF; + } + PUSHi(PerlIO_fileno(fp)); RETURN; } @@ -709,8 +716,11 @@ PP(pp_binmode) } EXTEND(SP, 1); - if (!(io = GvIO(gv)) || !(fp = IoIFP(io))) - RETPUSHUNDEF; + if (!(io = GvIO(gv)) || !(fp = IoIFP(io))) { + if (ckWARN2(WARN_UNOPENED,WARN_CLOSED)) + report_evil_fh(gv, io, PL_op->op_type); + RETPUSHUNDEF; + } if (discp) { names = SvPV(discp,len); @@ -2050,9 +2060,11 @@ PP(pp_ioctl) char *s; IV retval; GV *gv = (GV*)POPs; - IO *io = GvIOn(gv); + IO *io = gv ? GvIOn(gv) : 0; if (!io || !argsv || !IoIFP(io)) { + if (ckWARN2(WARN_UNOPENED,WARN_CLOSED)) + report_evil_fh(gv, io, PL_op->op_type); SETERRNO(EBADF,RMS$_IFI); /* well, sort of... */ RETPUSHUNDEF; } @@ -2164,16 +2176,17 @@ PP(pp_socket) int fd; gv = (GV*)POPs; + io = gv ? GvIOn(gv) : NULL; - if (!gv) { + if (!gv || !io) { + if (ckWARN2(WARN_UNOPENED,WARN_CLOSED)) + report_evil_fh(gv, io, PL_op->op_type); + if (IoIFP(io)) + do_close(gv, FALSE); SETERRNO(EBADF,LIB$_INVARG); RETPUSHUNDEF; } - io = GvIOn(gv); - if (IoIFP(io)) - do_close(gv, FALSE); - TAINT_PROPER("socket"); fd = PerlSock_socket(domain, type, protocol); if (fd < 0) @@ -2212,15 +2225,21 @@ PP(pp_sockpair) gv2 = (GV*)POPs; gv1 = (GV*)POPs; - if (!gv1 || !gv2) + io1 = gv1 ? GvIOn(gv1) : NULL; + io2 = gv2 ? GvIOn(gv2) : NULL; + if (!gv1 || !gv2 || !io1 || !io2) { + if (ckWARN2(WARN_UNOPENED,WARN_CLOSED)) { + if (!gv1 || !io1) + report_evil_fh(gv1, io1, PL_op->op_type); + if (!gv2 || !io2) + report_evil_fh(gv1, io2, PL_op->op_type); + } + if (IoIFP(io1)) + do_close(gv1, FALSE); + if (IoIFP(io2)) + do_close(gv2, FALSE); RETPUSHUNDEF; - - io1 = GvIOn(gv1); - io2 = GvIOn(gv2); - if (IoIFP(io1)) - do_close(gv1, FALSE); - if (IoIFP(io2)) - do_close(gv2, FALSE); + } TAINT_PROPER("socketpair"); if (PerlSock_socketpair(domain, type, protocol, fd) < 0) @@ -2346,9 +2365,9 @@ PP(pp_listen) #ifdef HAS_SOCKET int backlog = POPi; GV *gv = (GV*)POPs; - register IO *io = GvIOn(gv); + register IO *io = gv ? GvIOn(gv) : NULL; - if (!io || !IoIFP(io)) + if (!gv || !io || !IoIFP(io)) goto nuts; if (PerlSock_listen(PerlIO_fileno(IoIFP(io)), backlog) >= 0) diff --git a/proto.h b/proto.h index 4fc260e..1bcb5cd 100644 --- a/proto.h +++ b/proto.h @@ -616,6 +616,7 @@ PERL_CALLCONV void Perl_push_scope(pTHX); PERL_CALLCONV OP* Perl_ref(pTHX_ OP* o, I32 type); PERL_CALLCONV OP* Perl_refkids(pTHX_ OP* o, I32 type); PERL_CALLCONV void Perl_regdump(pTHX_ regexp* r); +PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ struct regnode *n, bool doinit, SV **initsvp); PERL_CALLCONV I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave); PERL_CALLCONV void Perl_pregfree(pTHX_ struct regexp* r); PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm); @@ -1111,7 +1112,6 @@ STATIC regnode* S_regatom(pTHX_ struct RExC_state_t*, I32 *); STATIC regnode* S_regbranch(pTHX_ struct RExC_state_t*, I32 *, I32); STATIC void S_reguni(pTHX_ struct RExC_state_t*, UV, char *, STRLEN*); STATIC regnode* S_regclass(pTHX_ struct RExC_state_t*); -STATIC regnode* S_regclassutf8(pTHX_ struct RExC_state_t*); STATIC I32 S_regcurly(pTHX_ char *); STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t*, U8); STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t*, I32 *); @@ -1141,8 +1141,7 @@ STATIC I32 S_regmatch(pTHX_ regnode *prog); STATIC I32 S_regrepeat(pTHX_ regnode *p, I32 max); STATIC I32 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp); STATIC I32 S_regtry(pTHX_ regexp *prog, char *startpos); -STATIC bool S_reginclass(pTHX_ regnode *p, I32 c); -STATIC bool S_reginclassutf8(pTHX_ regnode *f, U8* p); +STATIC bool S_reginclass(pTHX_ regnode *n, U8 *p, bool do_utf8sv_is_utf8); STATIC CHECKPOINT S_regcppush(pTHX_ I32 parenfloor); STATIC char* S_regcppop(pTHX); STATIC char* S_regcp_set_to(pTHX_ I32 ss); diff --git a/regcomp.c b/regcomp.c index aae2ced..8748271 100644 --- a/regcomp.c +++ b/regcomp.c @@ -118,7 +118,7 @@ typedef struct RExC_state_t { char *end; /* End of input for compile */ char *parse; /* Input-scan pointer. */ I32 whilem_seen; /* number of WHILEM in this expr */ - regnode *emit; /* Code-emit pointer; ®dummy = don't */ + regnode *emit; /* Code-emit pointer; ®dummy = don't = compiling */ I32 naughty; /* How bad is this pattern? */ I32 sawback; /* Did we see \1, ...? */ U32 seen; @@ -234,8 +234,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #define LOC (RExC_flags16 & PMf_LOCALE) #define FOLD (RExC_flags16 & PMf_FOLD) -#define OOB_CHAR8 1234 -#define OOB_UTF8 123456 +#define OOB_UNICODE 12345678 #define OOB_NAMEDCLASS -1 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) @@ -1196,7 +1195,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg break; } } - else if (strchr((char*)PL_simple,OP(scan)) || PL_regkind[(U8)OP(scan)] == ANYUTF8) { + else if (strchr((char*)PL_simple,OP(scan))) { int value; if (flags & SCF_DO_SUBSTR) { @@ -1210,20 +1209,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg /* Some of the logic below assumes that switching locale on will only add false positives. */ switch (PL_regkind[(U8)OP(scan)]) { - case ANYUTF8: case SANY: - case SANYUTF8: - case ALNUMUTF8: - case ANYOFUTF8: - case ALNUMLUTF8: - case NALNUMUTF8: - case NALNUMLUTF8: - case SPACEUTF8: - case NSPACEUTF8: - case SPACELUTF8: - case NSPACELUTF8: - case DIGITUTF8: - case NDIGITUTF8: default: do_default: /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */ @@ -1750,7 +1736,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) /* turn .* into ^.* with an implied $*=1 */ int type = OP(NEXTOPER(first)); - if (type == REG_ANY || type == ANYUTF8) + if (type == REG_ANY) type = ROPT_ANCH_MBOL; else type = ROPT_ANCH_SBOL; @@ -1850,8 +1836,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) longest_fixed_length = 0; } if (r->regstclass - && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == ANYUTF8 - || OP(r->regstclass) == SANYUTF8 || OP(r->regstclass) == SANY)) + && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY)) r->regstclass = NULL; if ((!r->anchored_substr || r->anchored_offset) && stclass_flag && !(data.start_class->flags & ANYOF_EOS) @@ -1866,6 +1851,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) struct regnode_charclass_class); r->regstclass = (regnode*)RExC_rx->data->data[n]; r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + PL_regdata = r->data; /* for regprop() */ DEBUG_r((sv = sv_newmortal(), regprop(sv, (regnode*)data.start_class), PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n", @@ -1933,7 +1919,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->reganch |= ROPT_EVAL_SEEN; Newz(1002, r->startp, RExC_npar, I32); Newz(1002, r->endp, RExC_npar, I32); - PL_regdata = r->data; /* for regprop() ANYOFUTF8 */ + PL_regdata = r->data; /* for regprop() */ DEBUG_r(regdump(r)); return(r); } @@ -2556,26 +2542,17 @@ tryagain: break; case '.': nextchar(pRExC_state); - if (UTF) { - if (RExC_flags16 & PMf_SINGLELINE) - ret = reg_node(pRExC_state, SANYUTF8); - else - ret = reg_node(pRExC_state, ANYUTF8); - *flagp |= HASWIDTH; - } - else { - if (RExC_flags16 & PMf_SINGLELINE) - ret = reg_node(pRExC_state, SANY); - else - ret = reg_node(pRExC_state, REG_ANY); - *flagp |= HASWIDTH|SIMPLE; - } + if (RExC_flags16 & PMf_SINGLELINE) + ret = reg_node(pRExC_state, SANY); + else + ret = reg_node(pRExC_state, REG_ANY); + *flagp |= HASWIDTH|SIMPLE; RExC_naughty++; break; case '[': { char *oregcomp_parse = ++RExC_parse; - ret = (UTF ? regclassutf8(pRExC_state) : regclass(pRExC_state)); + ret = regclass(pRExC_state); if (*RExC_parse != ']') { RExC_parse = oregcomp_parse; vFAIL("Unmatched ["); @@ -2659,20 +2636,14 @@ tryagain: is_utf8_mark((U8*)"~"); /* preload table */ break; case 'w': - ret = reg_node(pRExC_state, - UTF - ? (LOC ? ALNUMLUTF8 : ALNUMUTF8) - : (LOC ? ALNUML : ALNUM)); + ret = reg_node(pRExC_state, LOC ? ALNUML : ALNUM); *flagp |= HASWIDTH|SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_alnum) is_utf8_alnum((U8*)"a"); /* preload table */ break; case 'W': - ret = reg_node(pRExC_state, - UTF - ? (LOC ? NALNUMLUTF8 : NALNUMUTF8) - : (LOC ? NALNUML : NALNUM)); + ret = reg_node(pRExC_state, LOC ? NALNUML : NALNUM); *flagp |= HASWIDTH|SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_alnum) @@ -2681,10 +2652,7 @@ tryagain: case 'b': RExC_seen_zerolen++; RExC_seen |= REG_SEEN_LOOKBEHIND; - ret = reg_node(pRExC_state, - UTF - ? (LOC ? BOUNDLUTF8 : BOUNDUTF8) - : (LOC ? BOUNDL : BOUND)); + ret = reg_node(pRExC_state, LOC ? BOUNDL : BOUND); *flagp |= SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_alnum) @@ -2693,44 +2661,35 @@ tryagain: case 'B': RExC_seen_zerolen++; RExC_seen |= REG_SEEN_LOOKBEHIND; - ret = reg_node(pRExC_state, - UTF - ? (LOC ? NBOUNDLUTF8 : NBOUNDUTF8) - : (LOC ? NBOUNDL : NBOUND)); + ret = reg_node(pRExC_state, LOC ? NBOUNDL : NBOUND); *flagp |= SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_alnum) is_utf8_alnum((U8*)"a"); /* preload table */ break; case 's': - ret = reg_node(pRExC_state, - UTF - ? (LOC ? SPACELUTF8 : SPACEUTF8) - : (LOC ? SPACEL : SPACE)); + ret = reg_node(pRExC_state, LOC ? SPACEL : SPACE); *flagp |= HASWIDTH|SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_space) is_utf8_space((U8*)" "); /* preload table */ break; case 'S': - ret = reg_node(pRExC_state, - UTF - ? (LOC ? NSPACELUTF8 : NSPACEUTF8) - : (LOC ? NSPACEL : NSPACE)); + ret = reg_node(pRExC_state, LOC ? NSPACEL : NSPACE); *flagp |= HASWIDTH|SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_space) is_utf8_space((U8*)" "); /* preload table */ break; case 'd': - ret = reg_node(pRExC_state, UTF ? DIGITUTF8 : DIGIT); + ret = reg_node(pRExC_state, DIGIT); *flagp |= HASWIDTH|SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_digit) is_utf8_digit((U8*)"1"); /* preload table */ break; case 'D': - ret = reg_node(pRExC_state, UTF ? NDIGITUTF8 : NDIGIT); + ret = reg_node(pRExC_state, NDIGIT); *flagp |= HASWIDTH|SIMPLE; nextchar(pRExC_state); if (UTF && !PL_utf8_digit) @@ -2754,7 +2713,7 @@ tryagain: RExC_end = RExC_parse + 2; RExC_parse--; - ret = regclassutf8(pRExC_state); + ret = regclass(pRExC_state); RExC_end = oldregxend; RExC_parse--; @@ -3194,58 +3153,110 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state) STATIC regnode * S_regclass(pTHX_ RExC_state_t *pRExC_state) { - register U32 value; - register I32 lastvalue = OOB_CHAR8; - register I32 range = 0; + register UV value; + register IV lastvalue = OOB_UNICODE; + register IV range = 0; register regnode *ret; STRLEN numlen; - I32 namedclass; + IV namedclass; char *rangebegin; bool need_class = 0; + SV *listsv; + register char *e; + UV n; + + ret = reganode(pRExC_state, ANYOF, 0); + + if (!SIZE_ONLY) + ANYOF_FLAGS(ret) = 0; + + if (*RExC_parse == '^') { /* Complement of range. */ + RExC_naughty++; + RExC_parse++; + if (!SIZE_ONLY) + ANYOF_FLAGS(ret) |= ANYOF_INVERT; + } - ret = reg_node(pRExC_state, ANYOF); if (SIZE_ONLY) RExC_size += ANYOF_SKIP; else { - ret->flags = 0; - ANYOF_BITMAP_ZERO(ret); RExC_emit += ANYOF_SKIP; if (FOLD) ANYOF_FLAGS(ret) |= ANYOF_FOLD; if (LOC) ANYOF_FLAGS(ret) |= ANYOF_LOCALE; - } - if (*RExC_parse == '^') { /* Complement of range. */ - RExC_naughty++; - RExC_parse++; - if (!SIZE_ONLY) - ANYOF_FLAGS(ret) |= ANYOF_INVERT; + ANYOF_BITMAP_ZERO(ret); + listsv = newSVpvn("# comment\n", 10); } if (!SIZE_ONLY && ckWARN(WARN_REGEXP)) checkposixcc(pRExC_state); if (*RExC_parse == ']' || *RExC_parse == '-') - goto skipcond; /* allow 1st char to be ] or - */ + goto charclassloop; /* allow 1st char to be ] or - */ + while (RExC_parse < RExC_end && *RExC_parse != ']') { - skipcond: - namedclass = OOB_NAMEDCLASS; + + charclassloop: + + namedclass = OOB_NAMEDCLASS; /* initialize as illegal */ + if (!range) rangebegin = RExC_parse; - value = UCHARAT(RExC_parse++); + if (UTF) { + value = utf8_to_uv((U8*)RExC_parse, + RExC_end - RExC_parse, + &numlen, 0); + RExC_parse += numlen; + } + else + value = UCHARAT(RExC_parse++); if (value == '[') namedclass = regpposixcc(pRExC_state, value); else if (value == '\\') { - value = UCHARAT(RExC_parse++); + if (UTF) { + value = utf8_to_uv((U8*)RExC_parse, + RExC_end - RExC_parse, + &numlen, 0); + RExC_parse += numlen; + } + else + value = UCHARAT(RExC_parse++); /* Some compilers cannot handle switching on 64-bit integer - * values, therefore the 'value' cannot be an UV. --jhi */ - switch (value) { + * values, therefore value cannot be an UV. Yes, this will + * be a problem later if we want switch on Unicode. + * A similar issue a little bit later when switching on + * namedclass. --jhi */ + switch ((I32)value) { case 'w': namedclass = ANYOF_ALNUM; break; case 'W': namedclass = ANYOF_NALNUM; break; case 's': namedclass = ANYOF_SPACE; break; case 'S': namedclass = ANYOF_NSPACE; break; case 'd': namedclass = ANYOF_DIGIT; break; case 'D': namedclass = ANYOF_NDIGIT; break; + case 'p': + case 'P': + if (*RExC_parse == '{') { + e = strchr(RExC_parse++, '}'); + if (!e) + vFAIL("Missing right brace on \\p{}"); + n = e - RExC_parse; + } + else { + e = RExC_parse; + n = 1; + } + if (!SIZE_ONLY) { + if (value == 'p') + Perl_sv_catpvf(aTHX_ listsv, + "+utf8::%.*s\n", (int)n, RExC_parse); + else + Perl_sv_catpvf(aTHX_ listsv, + "!utf8::%.*s\n", (int)n, RExC_parse); + } + RExC_parse = e + 1; + ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + continue; case 'n': value = '\n'; break; case 'r': value = '\r'; break; case 't': value = '\t'; break; @@ -3259,9 +3270,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) case 'a': value = '\057'; break; #endif case 'x': - numlen = 0; /* disallow underscores */ - value = (UV)scan_hex(RExC_parse, 2, &numlen); - RExC_parse += numlen; + if (*RExC_parse == '{') { + e = strchr(RExC_parse++, '}'); + if (!e) + vFAIL("Missing right brace on \\x{}"); + numlen = 1; /* allow underscores */ + value = (UV)scan_hex(RExC_parse, + e - RExC_parse, + &numlen); + RExC_parse = e + 1; + } + else { + numlen = 0; /* disallow underscores */ + value = (UV)scan_hex(RExC_parse, 2, &numlen); + RExC_parse += numlen; + } break; case 'c': value = UCHARAT(RExC_parse++); @@ -3275,16 +3298,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) break; default: if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) - - vWARN2(RExC_parse, "Unrecognized escape \\%c in character class passed through", (int)value); + vWARN2(RExC_parse, + "Unrecognized escape \\%c in character class passed through", + (int)value); break; } - } - if (namedclass > OOB_NAMEDCLASS) { - if (!need_class && !SIZE_ONLY) + } /* end of \blah */ + + if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */ + + if (!SIZE_ONLY && !need_class) ANYOF_CLASS_ZERO(ret); + need_class = 1; - if (range) { /* a-\d, a-[:digit:] */ + + /* a bad range like a-\d, a-[:digit:] ? */ + if (range) { if (!SIZE_ONLY) { if (ckWARN(WARN_REGEXP)) vWARN4(RExC_parse, @@ -3292,13 +3321,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) RExC_parse - rangebegin, RExC_parse - rangebegin, rangebegin); - ANYOF_BITMAP_SET(ret, lastvalue); - ANYOF_BITMAP_SET(ret, '-'); + if (lastvalue < 256) { + ANYOF_BITMAP_SET(ret, lastvalue); + ANYOF_BITMAP_SET(ret, '-'); + } + else { + ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + Perl_sv_catpvf(aTHX_ listsv, + /* 0x002D is Unicode for '-' */ + "%04"UVxf"\n002D\n", (UV)lastvalue); + } } - range = 0; /* this is not a true range */ + + range = 0; /* this was not a true range */ } + if (!SIZE_ONLY) { - switch (namedclass) { + /* Possible truncation here but in some 64-bit environments + * the compiler gets heartburn about switch on 64-bit values. + * A similar issue a little earlier when switching on value. + * --jhi */ + switch ((I32)namedclass) { case ANYOF_ALNUM: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_ALNUM); @@ -3307,6 +3350,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isALNUM(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; case ANYOF_NALNUM: if (LOC) @@ -3316,42 +3360,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isALNUM(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; - case ANYOF_SPACE: - if (LOC) - ANYOF_CLASS_SET(ret, ANYOF_SPACE); - else { - for (value = 0; value < 256; value++) - if (isSPACE(value)) - ANYOF_BITMAP_SET(ret, value); - } - break; - case ANYOF_NSPACE: + case ANYOF_ALNUMC: if (LOC) - ANYOF_CLASS_SET(ret, ANYOF_NSPACE); + ANYOF_CLASS_SET(ret, ANYOF_ALNUMC); else { for (value = 0; value < 256; value++) - if (!isSPACE(value)) + if (isALNUMC(value)) ANYOF_BITMAP_SET(ret, value); } - break; - case ANYOF_DIGIT: - if (LOC) - ANYOF_CLASS_SET(ret, ANYOF_DIGIT); - else { - for (value = '0'; value <= '9'; value++) - ANYOF_BITMAP_SET(ret, value); - } - break; - case ANYOF_NDIGIT: - if (LOC) - ANYOF_CLASS_SET(ret, ANYOF_NDIGIT); - else { - for (value = 0; value < '0'; value++) - ANYOF_BITMAP_SET(ret, value); - for (value = '9' + 1; value < 256; value++) - ANYOF_BITMAP_SET(ret, value); - } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; case ANYOF_NALNUMC: if (LOC) @@ -3361,15 +3380,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isALNUMC(value)) ANYOF_BITMAP_SET(ret, value); } - break; - case ANYOF_ALNUMC: - if (LOC) - ANYOF_CLASS_SET(ret, ANYOF_ALNUMC); - else { - for (value = 0; value < 256; value++) - if (isALNUMC(value)) - ANYOF_BITMAP_SET(ret, value); - } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; case ANYOF_ALPHA: if (LOC) @@ -3379,6 +3390,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isALPHA(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; case ANYOF_NALPHA: if (LOC) @@ -3388,6 +3400,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isALPHA(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; case ANYOF_ASCII: if (LOC) @@ -3402,6 +3415,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #endif /* EBCDIC */ } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; case ANYOF_NASCII: if (LOC) @@ -3416,6 +3430,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #endif /* EBCDIC */ } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; case ANYOF_BLANK: if (LOC) @@ -3425,6 +3440,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isBLANK(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n"); break; case ANYOF_NBLANK: if (LOC) @@ -3434,6 +3450,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isBLANK(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n"); break; case ANYOF_CNTRL: if (LOC) @@ -3443,7 +3460,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isCNTRL(value)) ANYOF_BITMAP_SET(ret, value); } - lastvalue = OOB_CHAR8; + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; case ANYOF_NCNTRL: if (LOC) @@ -3453,6 +3470,29 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isCNTRL(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); + break; + case ANYOF_DIGIT: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_DIGIT); + else { + /* consecutive digits assumed */ + for (value = '0'; value <= '9'; value++) + ANYOF_BITMAP_SET(ret, value); + } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); + break; + case ANYOF_NDIGIT: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_NDIGIT); + else { + /* consecutive digits assumed */ + for (value = 0; value < '0'; value++) + ANYOF_BITMAP_SET(ret, value); + for (value = '9' + 1; value < 256; value++) + ANYOF_BITMAP_SET(ret, value); + } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; case ANYOF_GRAPH: if (LOC) @@ -3462,6 +3502,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isGRAPH(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; case ANYOF_NGRAPH: if (LOC) @@ -3471,6 +3512,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isGRAPH(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; case ANYOF_LOWER: if (LOC) @@ -3480,6 +3522,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isLOWER(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; case ANYOF_NLOWER: if (LOC) @@ -3489,6 +3532,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isLOWER(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; case ANYOF_PRINT: if (LOC) @@ -3498,6 +3542,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isPRINT(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; case ANYOF_NPRINT: if (LOC) @@ -3507,6 +3552,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isPRINT(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; case ANYOF_PSXSPC: if (LOC) @@ -3516,6 +3562,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isPSXSPC(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; case ANYOF_NPSXSPC: if (LOC) @@ -3525,6 +3572,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isPSXSPC(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; case ANYOF_PUNCT: if (LOC) @@ -3534,6 +3582,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isPUNCT(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; case ANYOF_NPUNCT: if (LOC) @@ -3543,6 +3592,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isPUNCT(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); + break; + case ANYOF_SPACE: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_SPACE); + else { + for (value = 0; value < 256; value++) + if (isSPACE(value)) + ANYOF_BITMAP_SET(ret, value); + } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n"); + break; + case ANYOF_NSPACE: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_NSPACE); + else { + for (value = 0; value < 256; value++) + if (!isSPACE(value)) + ANYOF_BITMAP_SET(ret, value); + } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n"); break; case ANYOF_UPPER: if (LOC) @@ -3552,6 +3622,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isUPPER(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; case ANYOF_NUPPER: if (LOC) @@ -3561,6 +3632,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isUPPER(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; case ANYOF_XDIGIT: if (LOC) @@ -3570,6 +3642,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isXDIGIT(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; case ANYOF_NXDIGIT: if (LOC) @@ -3579,6 +3652,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isXDIGIT(value)) ANYOF_BITMAP_SET(ret, value); } + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; default: vFAIL("Invalid [::] class"); @@ -3588,7 +3662,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_FLAGS(ret) |= ANYOF_CLASS; continue; } - } + } /* end of namedclass \blah */ + if (range) { if (lastvalue > value) /* b-a */ { Simple_vFAIL4("Invalid [] range \"%*.*s\"", @@ -3596,14 +3671,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) RExC_parse - rangebegin, rangebegin); } - range = 0; + range = 0; /* not a true range */ } else { - lastvalue = value; + lastvalue = value; /* save the beginning of the range */ if (*RExC_parse == '-' && RExC_parse+1 < RExC_end && RExC_parse[1] != ']') { RExC_parse++; - if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ + + /* a bad range like \w-, [:word:]- ? */ + if (namedclass > OOB_NAMEDCLASS) { if (ckWARN(WARN_REGEXP)) vWARN4(RExC_parse, "False [] range \"%*.*s\"", @@ -3613,325 +3690,89 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!SIZE_ONLY) ANYOF_BITMAP_SET(ret, '-'); } else - range = 1; - continue; /* do it next time */ + range = 1; /* yeah, it's a range! */ + continue; /* but do it the next time */ } } + /* now is the next time */ if (!SIZE_ONLY) { + if (lastvalue < 256 && value < 256) { #ifndef ASCIIish /* EBCDIC, for example. */ - if ((isLOWER(lastvalue) && isLOWER(value)) || - (isUPPER(lastvalue) && isUPPER(value))) - { - I32 i; - if (isLOWER(lastvalue)) { - for (i = lastvalue; i <= value; i++) - if (isLOWER(i)) - ANYOF_BITMAP_SET(ret, i); - } else { - for (i = lastvalue; i <= value; i++) - if (isUPPER(i)) - ANYOF_BITMAP_SET(ret, i); + if ((isLOWER(lastvalue) && isLOWER(value)) || + (isUPPER(lastvalue) && isUPPER(value))) + { + IV i; + if (isLOWER(lastvalue)) { + for (i = lastvalue; i <= value; i++) + if (isLOWER(i)) + ANYOF_BITMAP_SET(ret, i); + } else { + for (i = lastvalue; i <= value; i++) + if (isUPPER(i)) + ANYOF_BITMAP_SET(ret, i); + } } - } - else + else #endif - for ( ; lastvalue <= value; lastvalue++) - ANYOF_BITMAP_SET(ret, lastvalue); + for ( ; lastvalue <= value; lastvalue++) + ANYOF_BITMAP_SET(ret, lastvalue); + } else { + ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + if (lastvalue < value) + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", + (UV)lastvalue, (UV)value); + else + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + (UV)value); + } } - range = 0; + + range = 0; /* this range (if it was one) is done now */ } + if (need_class) { if (SIZE_ONLY) RExC_size += ANYOF_CLASS_ADD_SKIP; else RExC_emit += ANYOF_CLASS_ADD_SKIP; } + /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && - (ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) { + (ANYOF_FLAGS(ret) & + /* If the only flag is folding (plus possibly inversion). */ + (ANYOF_FLAGS_ALL ^ ANYOF_INVERT) == ANYOF_FOLD)) { for (value = 0; value < 256; ++value) { if (ANYOF_BITMAP_TEST(ret, value)) { - I32 cf = PL_fold[value]; - ANYOF_BITMAP_SET(ret, cf); + IV fold = PL_fold[value]; + + if (fold != value) + ANYOF_BITMAP_SET(ret, fold); } } ANYOF_FLAGS(ret) &= ~ANYOF_FOLD; } + /* optimize inverted simple patterns (e.g. [^a-z]) */ - if (!SIZE_ONLY && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) { + if (!SIZE_ONLY && + /* If the only flag is inversion. */ + (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) { for (value = 0; value < ANYOF_BITMAP_SIZE; ++value) ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL; ANYOF_FLAGS(ret) = 0; } - return ret; -} - -STATIC regnode * -S_regclassutf8(pTHX_ RExC_state_t *pRExC_state) -{ - register char *e; - register U32 value; - register U32 lastvalue = OOB_UTF8; - register I32 range = 0; - register regnode *ret; - STRLEN numlen; - I32 n; - SV *listsv; - U8 flags = 0; - I32 namedclass; - char *rangebegin; - - if (*RExC_parse == '^') { /* Complement of range. */ - RExC_naughty++; - RExC_parse++; - if (!SIZE_ONLY) - flags |= ANYOF_INVERT; - } - if (!SIZE_ONLY) { - if (FOLD) - flags |= ANYOF_FOLD; - if (LOC) - flags |= ANYOF_LOCALE; - listsv = newSVpvn("# comment\n", 10); - } - if (!SIZE_ONLY && ckWARN(WARN_REGEXP)) - checkposixcc(pRExC_state); - - if (*RExC_parse == ']' || *RExC_parse == '-') - goto skipcond; /* allow 1st char to be ] or - */ - - while (RExC_parse < RExC_end && *RExC_parse != ']') { - skipcond: - namedclass = OOB_NAMEDCLASS; - if (!range) - rangebegin = RExC_parse; - value = utf8_to_uv((U8*)RExC_parse, - RExC_end - RExC_parse, - &numlen, 0); - RExC_parse += numlen; - if (value == '[') - namedclass = regpposixcc(pRExC_state, value); - else if (value == '\\') { - value = (U32)utf8_to_uv((U8*)RExC_parse, - RExC_end - RExC_parse, - &numlen, 0); - RExC_parse += numlen; - /* Some compilers cannot handle switching on 64-bit integer - * values, therefore value cannot be an UV. Yes, this will - * be a problem later if we want switch on Unicode. --jhi */ - switch (value) { - case 'w': namedclass = ANYOF_ALNUM; break; - case 'W': namedclass = ANYOF_NALNUM; break; - case 's': namedclass = ANYOF_SPACE; break; - case 'S': namedclass = ANYOF_NSPACE; break; - case 'd': namedclass = ANYOF_DIGIT; break; - case 'D': namedclass = ANYOF_NDIGIT; break; - case 'p': - case 'P': - if (*RExC_parse == '{') { - e = strchr(RExC_parse++, '}'); - if (!e) - vFAIL("Missing right brace on \\p{}"); - n = e - RExC_parse; - } - else { - e = RExC_parse; - n = 1; - } - if (!SIZE_ONLY) { - if (value == 'p') - Perl_sv_catpvf(aTHX_ listsv, - "+utf8::%.*s\n", (int)n, RExC_parse); - else - Perl_sv_catpvf(aTHX_ listsv, - "!utf8::%.*s\n", (int)n, RExC_parse); - } - RExC_parse = e + 1; - lastvalue = OOB_UTF8; - continue; - case 'n': value = '\n'; break; - case 'r': value = '\r'; break; - case 't': value = '\t'; break; - case 'f': value = '\f'; break; - case 'b': value = '\b'; break; -#ifdef ASCIIish - case 'e': value = '\033'; break; - case 'a': value = '\007'; break; -#else - case 'e': value = '\047'; break; - case 'a': value = '\057'; break; -#endif - case 'x': - if (*RExC_parse == '{') { - e = strchr(RExC_parse++, '}'); - if (!e) - vFAIL("Missing right brace on \\x{}"); - numlen = 1; /* allow underscores */ - value = (UV)scan_hex(RExC_parse, - e - RExC_parse, - &numlen); - RExC_parse = e + 1; - } - else { - numlen = 0; /* disallow underscores */ - value = (UV)scan_hex(RExC_parse, 2, &numlen); - RExC_parse += numlen; - } - break; - case 'c': - value = UCHARAT(RExC_parse++); - value = toCTRL(value); - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - numlen = 0; /* disallow underscores */ - value = (UV)scan_oct(--RExC_parse, 3, &numlen); - RExC_parse += numlen; - break; - default: - if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) - vWARN2(RExC_parse, - "Unrecognized escape \\%c in character class passed through", - (int)value); - break; - } - } - if (namedclass > OOB_NAMEDCLASS) { - if (range) { /* a-\d, a-[:digit:] */ - if (!SIZE_ONLY) { - if (ckWARN(WARN_REGEXP)) - vWARN4(RExC_parse, - "False [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, - rangebegin); - Perl_sv_catpvf(aTHX_ listsv, - /* 0x002D is Unicode for '-' */ - "%04"UVxf"\n002D\n", (UV)lastvalue); - } - range = 0; - } - if (!SIZE_ONLY) { - switch (namedclass) { - case ANYOF_ALNUM: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; - case ANYOF_NALNUM: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; - case ANYOF_ALNUMC: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; - case ANYOF_NALNUMC: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; - case ANYOF_ALPHA: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; - case ANYOF_NALPHA: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; - case ANYOF_ASCII: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; - case ANYOF_NASCII: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; - case ANYOF_CNTRL: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; - case ANYOF_NCNTRL: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break; - case ANYOF_GRAPH: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; - case ANYOF_NGRAPH: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; - case ANYOF_DIGIT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break; - case ANYOF_NDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; - case ANYOF_LOWER: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; - case ANYOF_NLOWER: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; - case ANYOF_PRINT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; - case ANYOF_NPRINT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; - case ANYOF_PUNCT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; - case ANYOF_NPUNCT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; - case ANYOF_SPACE: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");break; - case ANYOF_NSPACE: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");break; - case ANYOF_BLANK: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n"); break; - case ANYOF_NBLANK: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n"); break; - case ANYOF_PSXSPC: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; - case ANYOF_NPSXSPC: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; - case ANYOF_UPPER: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; - case ANYOF_NUPPER: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; - case ANYOF_XDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; - case ANYOF_NXDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; - } - continue; - } - } - if (range) { - if (lastvalue > value) { /* b-a */ - Simple_vFAIL4("Invalid [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, - rangebegin); - } - range = 0; - } - else { - lastvalue = value; - if (*RExC_parse == '-' && RExC_parse+1 < RExC_end && - RExC_parse[1] != ']') { - RExC_parse++; - if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ - if (ckWARN(WARN_REGEXP)) - vWARN4(RExC_parse, - "False [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, - rangebegin); - if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, - /* 0x002D is Unicode for '-' */ - "002D\n"); - } else - range = 1; - continue; /* do it next time */ - } - } - /* now is the next time */ - if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", - (UV)lastvalue, (UV)value); - range = 0; - } - - ret = reganode(pRExC_state, ANYOFUTF8, 0); - - if (!SIZE_ONLY) { - SV *rv = swash_init("utf8", "", listsv, 1, 0); -#ifdef DEBUGGING + if (!SIZE_ONLY) { AV *av = newAV(); - av_push(av, rv); - av_push(av, listsv); - rv = newRV_inc((SV*)av); -#else - SvREFCNT_dec(listsv); -#endif + SV *rv; + + av_store(av, 0, listsv); + av_store(av, 1, NULL); + rv = newRV_noinc((SV*)av); n = add_data(pRExC_state, 1, "s"); RExC_rx->data->data[n] = (void*)rv; - ARG1_SET(ret, flags); - ARG2_SET(ret, n); + ARG_SET(ret, n); } return ret; @@ -4269,7 +4110,7 @@ Perl_regdump(pTHX_ regexp *r) STATIC void S_put_byte(pTHX_ SV *sv, int c) { - if (isCNTRL(c) || c == 127 || c == 255) + if (isCNTRL(c) || c == 127 || c == 255 || !isPRINT(c)) Perl_sv_catpvf(aTHX_ sv, "\\%o", c); else if (c == '-' || c == ']' || c == '\\' || c == '^') Perl_sv_catpvf(aTHX_ sv, "\\%c", c); @@ -4311,8 +4152,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ else if (k == ANYOF) { int i, rangestart = -1; - bool anyofutf8 = OP(o) == ANYOFUTF8; - U8 flags = anyofutf8 ? ARG1(o) : o->flags; + U8 flags = ANYOF_FLAGS(o); const char * const anyofs[] = { /* Should be syncronized with * ANYOF_ #xdefines in regcomp.h */ "\\w", @@ -4354,78 +4194,93 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); if (flags & ANYOF_INVERT) sv_catpv(sv, "^"); - if (OP(o) == ANYOF) { - for (i = 0; i <= 256; i++) { - if (i < 256 && ANYOF_BITMAP_TEST(o,i)) { - if (rangestart == -1) - rangestart = i; - } else if (rangestart != -1) { - if (i <= rangestart + 3) - for (; rangestart < i; rangestart++) - put_byte(sv, rangestart); - else { + for (i = 0; i <= 256; i++) { + if (i < 256 && ANYOF_BITMAP_TEST(o,i)) { + if (rangestart == -1) + rangestart = i; + } else if (rangestart != -1) { + if (i <= rangestart + 3) + for (; rangestart < i; rangestart++) put_byte(sv, rangestart); - sv_catpv(sv, "-"); - put_byte(sv, i - 1); - } - rangestart = -1; + else { + put_byte(sv, rangestart); + sv_catpv(sv, "-"); + put_byte(sv, i - 1); } + rangestart = -1; } - if (o->flags & ANYOF_CLASS) - for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++) - if (ANYOF_CLASS_TEST(o,i)) - sv_catpv(sv, anyofs[i]); } - else { - SV *rv = (SV*)PL_regdata->data[ARG2(o)]; - AV *av = (AV*)SvRV((SV*)rv); - SV *sw = *av_fetch(av, 0, FALSE); - SV *lv = *av_fetch(av, 1, FALSE); - UV i; - U8 s[UTF8_MAXLEN+1]; - for (i = 0; i <= 256; i++) { /* just the first 256 */ - U8 *e = uv_to_utf8(s, i); - if (i < 256 && swash_fetch(sw, s)) { - if (rangestart == -1) - rangestart = i; - } else if (rangestart != -1) { - U8 *p; - - if (i <= rangestart + 3) - for (; rangestart < i; rangestart++) { - for(e = uv_to_utf8(s, rangestart), p = s; p < e; p++) - put_byte(sv, *p); + + if (o->flags & ANYOF_CLASS) + for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++) + if (ANYOF_CLASS_TEST(o,i)) + sv_catpv(sv, anyofs[i]); + + if (flags & ANYOF_UNICODE) + sv_catpv(sv, "{unicode}"); + + { + SV *lv; + SV *sw = regclass_swash(o, FALSE, &lv); + + if (lv) { + if (sw) { + UV i; + U8 s[UTF8_MAXLEN+1]; + + for (i = 0; i <= 256; i++) { /* just the first 256 */ + U8 *e = uv_to_utf8(s, i); + + if (i < 256 && swash_fetch(sw, s)) { + if (rangestart == -1) + rangestart = i; + } else if (rangestart != -1) { + U8 *p; + + if (i <= rangestart + 3) + for (; rangestart < i; rangestart++) { + for(e = uv_to_utf8(s, rangestart), p = s; p < e; p++) + put_byte(sv, *p); + } + else { + for (e = uv_to_utf8(s, rangestart), p = s; p < e; p++) + put_byte(sv, *p); + sv_catpv(sv, "-"); + for (e = uv_to_utf8(s, i - 1), p = s; p < e; p++) + put_byte(sv, *p); + } + rangestart = -1; + } } - else { - for (e = uv_to_utf8(s, rangestart), p = s; p < e; p++) - put_byte(sv, *p); - sv_catpv(sv, "-"); - for (e = uv_to_utf8(s, i - 1), p = s; p < e; p++) - put_byte(sv, *p); - } - rangestart = -1; + + sv_catpv(sv, "..."); /* et cetera */ } - } - sv_catpv(sv, "..."); - { - char *s = savepv(SvPVX(lv)); - - while(*s && *s != '\n') s++; - if (*s == '\n') { - char *t = ++s; - while (*s) { - if (*s == '\n') - *s = ' '; - s++; + { + char *s = savepv(SvPVX(lv)); + char *origs = s; + + while(*s && *s != '\n') s++; + + if (*s == '\n') { + char *t = ++s; + + while (*s) { + if (*s == '\n') + *s = ' '; + s++; + } + if (s[-1] == ' ') + s[-1] = 0; + + sv_catpv(sv, t); } - if (s[-1] == ' ') - s[-1] = 0; - - sv_catpv(sv, t); + + Safefree(origs); } } } + Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); } else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) @@ -4486,16 +4341,6 @@ Perl_pregfree(pTHX_ struct regexp *r) while (--n >= 0) { switch (r->data->what[n]) { case 's': -#ifdef DEBUGGING - { - SV *rv = (SV*)r->data->data[n]; - AV *av = (AV*)SvRV((SV*)rv); - SV *sw = *av_fetch(av, 0, FALSE); - SV *lv = *av_fetch(av, 1, FALSE); - SvREFCNT_dec(sw); - SvREFCNT_dec(lv); - } -#endif SvREFCNT_dec((SV*)r->data->data[n]); break; case 'f': @@ -4657,4 +4502,3 @@ clear_re(pTHXo_ void *r) { ReREFCNT_dec((regexp *)r); } - diff --git a/regcomp.h b/regcomp.h index 284cf2f..c8094e1 100644 --- a/regcomp.h +++ b/regcomp.h @@ -88,12 +88,13 @@ struct regnode_2 { }; #define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */ -#define ANYOF_CLASSBITMAP_SIZE 4 +#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */ struct regnode_charclass { U8 flags; U8 type; U16 next_off; + U32 arg1; char bitmap[ANYOF_BITMAP_SIZE]; }; @@ -101,6 +102,7 @@ struct regnode_charclass_class { U8 flags; U8 type; U16 next_off; + U32 arg1; char bitmap[ANYOF_BITMAP_SIZE]; char classflags[ANYOF_CLASSBITMAP_SIZE]; }; @@ -180,13 +182,21 @@ struct regnode_charclass_class { /* Flags for node->flags of ANYOF */ -#define ANYOF_CLASS 0x08 -#define ANYOF_INVERT 0x04 -#define ANYOF_FOLD 0x02 -#define ANYOF_LOCALE 0x01 +#define ANYOF_CLASS 0x08 +#define ANYOF_INVERT 0x04 +#define ANYOF_FOLD 0x02 +#define ANYOF_LOCALE 0x01 /* Used for regstclass only */ -#define ANYOF_EOS 0x10 /* Can match an empty string too */ +#define ANYOF_EOS 0x10 /* Can match an empty string too */ + +/* There is a character or a range past 0xff */ +#define ANYOF_UNICODE 0x20 + +/* Are there any runtime flags on in this node? */ +#define ANYOF_RUNTIME(s) (ANYOF_FLAGS(s) & 0x0f) + +#define ANYOF_FLAGS_ALL 0xff /* Character classes for node->classflags of ANYOF */ /* Should be synchronized with a table in regprop() */ @@ -220,7 +230,7 @@ struct regnode_charclass_class { #define ANYOF_NXDIGIT 25 #define ANYOF_PSXSPC 26 /* POSIX space: \s plus the vertical tab */ #define ANYOF_NPSXSPC 27 -#define ANYOF_BLANK 28 /* GNU extension: space and tab */ +#define ANYOF_BLANK 28 /* GNU extension: space and tab: non-vertical space */ #define ANYOF_NBLANK 29 #define ANYOF_MAX 32 @@ -238,7 +248,6 @@ struct regnode_charclass_class { #define ANYOF_CLASS_SIZE (sizeof(struct regnode_charclass_class)) #define ANYOF_FLAGS(p) ((p)->flags) -#define ANYOF_FLAGS_ALL 0xff #define ANYOF_BIT(c) (1 << ((c) & 7)) @@ -300,12 +309,14 @@ EXTCONST U8 PL_varies[] = { EXTCONST U8 PL_simple[]; #else EXTCONST U8 PL_simple[] = { - REG_ANY, ANYUTF8, SANY, SANYUTF8, ANYOF, ANYOFUTF8, - ALNUM, ALNUMUTF8, ALNUML, ALNUMLUTF8, - NALNUM, NALNUMUTF8, NALNUML, NALNUMLUTF8, - SPACE, SPACEUTF8, SPACEL, SPACELUTF8, - NSPACE, NSPACEUTF8, NSPACEL, NSPACELUTF8, - DIGIT, DIGITUTF8, NDIGIT, NDIGITUTF8, 0 + REG_ANY, SANY, + ANYOF, + ALNUM, ALNUML, + NALNUM, NALNUML, + SPACE, SPACEL, + NSPACE, NSPACEL, + DIGIT, NDIGIT, + 0 }; #endif diff --git a/regcomp.sym b/regcomp.sym index bb5f8f8..59284f4 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -16,46 +16,27 @@ EOL EOL, no Match "" at end of line. MEOL EOL, no Same, assuming multiline. SEOL EOL, no Same, assuming singleline. BOUND BOUND, no Match "" at any word boundary -BOUNDUTF8 BOUND, no Match "" at any word boundary BOUNDL BOUND, no Match "" at any word boundary -BOUNDLUTF8 BOUND, no Match "" at any word boundary NBOUND NBOUND, no Match "" at any word non-boundary -NBOUNDUTF8 NBOUND, no Match "" at any word non-boundary NBOUNDL NBOUND, no Match "" at any word non-boundary -NBOUNDLUTF8 NBOUND, no Match "" at any word non-boundary GPOS GPOS, no Matches where last m//g left off. # [Special] alternatives REG_ANY REG_ANY, no Match any one character (except newline). -ANYUTF8 REG_ANY, no Match any one Unicode character (except newline). SANY REG_ANY, no Match any one character. -SANYUTF8 REG_ANY, no Match any one Unicode character. ANYOF ANYOF, sv Match character in (or not in) this class. -ANYOFUTF8 ANYOF, sv 1 Match character in (or not in) this class. ALNUM ALNUM, no Match any alphanumeric character -ALNUMUTF8 ALNUM, no Match any alphanumeric character in utf8 ALNUML ALNUM, no Match any alphanumeric char in locale -ALNUMLUTF8 ALNUM, no Match any alphanumeric char in locale+utf8 NALNUM NALNUM, no Match any non-alphanumeric character -NALNUMUTF8 NALNUM, no Match any non-alphanumeric character in utf8 NALNUML NALNUM, no Match any non-alphanumeric char in locale -NALNUMLUTF8 NALNUM, no Match any non-alphanumeric char in locale+utf8 SPACE SPACE, no Match any whitespace character -SPACEUTF8 SPACE, no Match any whitespace character in utf8 SPACEL SPACE, no Match any whitespace char in locale -SPACELUTF8 SPACE, no Match any whitespace char in locale+utf8 NSPACE NSPACE, no Match any non-whitespace character -NSPACEUTF8 NSPACE, no Match any non-whitespace character in utf8 NSPACEL NSPACE, no Match any non-whitespace char in locale -NSPACELUTF8 NSPACE, no Match any non-whitespace char in locale+utf8 DIGIT DIGIT, no Match any numeric character -DIGITUTF8 DIGIT, no Match any numeric character in utf8 DIGITL DIGIT, no Match any numeric character in locale -DIGITLUTF8 DIGIT, no Match any numeric character in locale+utf8 NDIGIT NDIGIT, no Match any non-numeric character -NDIGITUTF8 NDIGIT, no Match any non-numeric character in utf8 NDIGITL NDIGIT, no Match any non-numeric character in locale -NDIGITLUTF8 NDIGIT, no Match any non-numeric character in locale+utf8 CLUMP CLUMP, no Match any combining character sequence # BRANCH The set of branches constituting a single choice are hooked diff --git a/regexec.c b/regexec.c index 5e821ba..bdbdb59 100644 --- a/regexec.c +++ b/regexec.c @@ -39,6 +39,7 @@ /* *These* symbols are masked to allow static link. */ # define Perl_pregexec my_pregexec # define Perl_reginitcolors my_reginitcolors +# define Perl_regclass_swash my_regclass_swash # define PERL_NO_GET_CONTEXT #endif @@ -105,13 +106,6 @@ * Forwards. */ -#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c)) -#ifdef DEBUGGING -# define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch(*av_fetch((AV*)SvRV((SV*)PL_regdata->data[ARG2(f)]),0,FALSE),p)) -#else -# define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p)) -#endif - #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b) @@ -738,7 +732,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, t = s; if (prog->reganch & ROPT_UTF8) { - PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */ + PL_regdata = prog->data; PL_bostr = startpos; } s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); @@ -840,25 +834,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta unsigned int c2; char *e; register I32 tmp = 1; /* Scratch variable? */ + register bool do_utf8 = DO_UTF8(PL_reg_sv); /* We know what class it must start with. */ switch (OP(c)) { - case ANYOFUTF8: - while (s < strend) { - if (REGINCLASSUTF8(c, (U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; - else - tmp = doevery; - } - else - tmp = 1; - s += UTF8SKIP(s); - } - break; case ANYOF: while (s < strend) { - if (REGINCLASS(c, *(U8*)s)) { + if (reginclass(c, (U8*)s, do_utf8)) { if (tmp && (norun || regtry(prog, s))) goto got_it; else @@ -866,7 +848,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta } else tmp = 1; - s++; + s += do_utf8 ? UTF8SKIP(s) : 1; } break; case EXACTF: @@ -912,42 +894,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUND: - tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; - tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); - while (s < strend) { - if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { - tmp = !tmp; - if ((norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + if (s == startpos) + tmp = '\n'; + else { + U8 *r = reghop((U8*)s, -1); + + tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); + } + tmp = ((OP(c) == BOUND ? + isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); + while (s < strend) { + if (tmp == !(OP(c) == BOUND ? + swash_fetch(PL_utf8_alnum, (U8*)s) : + isALNUM_LC_utf8((U8*)s))) + { + tmp = !tmp; + if ((norun || regtry(prog, s))) + goto got_it; + } + s += UTF8SKIP(s); } - s++; } - if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) - goto got_it; - break; - case BOUNDLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case BOUNDUTF8: - if (s == startpos) - tmp = '\n'; else { - U8 *r = reghop((U8*)s, -1); - - tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); - } - tmp = ((OP(c) == BOUNDUTF8 ? - isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); - while (s < strend) { - if (tmp == !(OP(c) == BOUNDUTF8 ? - swash_fetch(PL_utf8_alnum, (U8*)s) : - isALNUM_LC_utf8((U8*)s))) - { - tmp = !tmp; - if ((norun || regtry(prog, s))) - goto got_it; + tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; + tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); + while (s < strend) { + if (tmp == + !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { + tmp = !tmp; + if ((norun || regtry(prog, s))) + goto got_it; + } + s++; } - s += UTF8SKIP(s); } if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) goto got_it; @@ -956,365 +936,382 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUND: - tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; - tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); - while (s < strend) { - if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) - tmp = !tmp; - else if ((norun || regtry(prog, s))) - goto got_it; - s++; + if (do_utf8) { + if (s == startpos) + tmp = '\n'; + else { + U8 *r = reghop((U8*)s, -1); + + tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); + } + tmp = ((OP(c) == NBOUND ? + isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); + while (s < strend) { + if (tmp == !(OP(c) == NBOUND ? + swash_fetch(PL_utf8_alnum, (U8*)s) : + isALNUM_LC_utf8((U8*)s))) + tmp = !tmp; + else if ((norun || regtry(prog, s))) + goto got_it; + s += UTF8SKIP(s); + } } - if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) - goto got_it; - break; - case NBOUNDLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case NBOUNDUTF8: - if (s == startpos) - tmp = '\n'; else { - U8 *r = reghop((U8*)s, -1); - - tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); - } - tmp = ((OP(c) == NBOUNDUTF8 ? - isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); - while (s < strend) { - if (tmp == !(OP(c) == NBOUNDUTF8 ? - swash_fetch(PL_utf8_alnum, (U8*)s) : - isALNUM_LC_utf8((U8*)s))) - tmp = !tmp; - else if ((norun || regtry(prog, s))) - goto got_it; - s += UTF8SKIP(s); + tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; + tmp = ((OP(c) == NBOUND ? + isALNUM(tmp) : isALNUM_LC(tmp)) != 0); + while (s < strend) { + if (tmp == + !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) + tmp = !tmp; + else if ((norun || regtry(prog, s))) + goto got_it; + s++; + } } if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) goto got_it; break; case ALNUM: - while (s < strend) { - if (isALNUM(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (swash_fetch(PL_utf8_alnum, (U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case ALNUMUTF8: - while (s < strend) { - if (swash_fetch(PL_utf8_alnum, (U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (isALNUM(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case ALNUML: PL_reg_flags |= RF_tainted; - while (s < strend) { - if (isALNUM_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (isALNUM_LC_utf8((U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case ALNUMLUTF8: - PL_reg_flags |= RF_tainted; - while (s < strend) { - if (isALNUM_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (isALNUM_LC(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case NALNUM: - while (s < strend) { - if (!isALNUM(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (!swash_fetch(PL_utf8_alnum, (U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case NALNUMUTF8: - while (s < strend) { - if (!swash_fetch(PL_utf8_alnum, (U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (!isALNUM(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case NALNUML: PL_reg_flags |= RF_tainted; - while (s < strend) { - if (!isALNUM_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (!isALNUM_LC_utf8((U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case NALNUMLUTF8: - PL_reg_flags |= RF_tainted; - while (s < strend) { - if (!isALNUM_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (!isALNUM_LC(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case SPACE: - while (s < strend) { - if (isSPACE(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case SPACEUTF8: - while (s < strend) { - if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (isSPACE(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case SPACEL: PL_reg_flags |= RF_tainted; - while (s < strend) { - if (isSPACE_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case SPACELUTF8: - PL_reg_flags |= RF_tainted; - while (s < strend) { - if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (isSPACE_LC(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case NSPACE: - while (s < strend) { - if (!isSPACE(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case NSPACEUTF8: - while (s < strend) { - if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (!isSPACE(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case NSPACEL: PL_reg_flags |= RF_tainted; - while (s < strend) { - if (!isSPACE_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case NSPACELUTF8: - PL_reg_flags |= RF_tainted; - while (s < strend) { - if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (!isSPACE_LC(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case DIGIT: - while (s < strend) { - if (isDIGIT(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (swash_fetch(PL_utf8_digit,(U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case DIGITUTF8: - while (s < strend) { - if (swash_fetch(PL_utf8_digit,(U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (isDIGIT(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case DIGITL: PL_reg_flags |= RF_tainted; - while (s < strend) { - if (isDIGIT_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (isDIGIT_LC_utf8((U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case DIGITLUTF8: - PL_reg_flags |= RF_tainted; - while (s < strend) { - if (isDIGIT_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (isDIGIT_LC(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case NDIGIT: - while (s < strend) { - if (!isDIGIT(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (!swash_fetch(PL_utf8_digit,(U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case NDIGITUTF8: - while (s < strend) { - if (!swash_fetch(PL_utf8_digit,(U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (!isDIGIT(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; case NDIGITL: PL_reg_flags |= RF_tainted; - while (s < strend) { - if (!isDIGIT_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + if (do_utf8) { + while (s < strend) { + if (!isDIGIT_LC_utf8((U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s += UTF8SKIP(s); } - else - tmp = 1; - s++; } - break; - case NDIGITLUTF8: - PL_reg_flags |= RF_tainted; - while (s < strend) { - if (!isDIGIT_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; + else { + while (s < strend) { + if (!isDIGIT_LC(*s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } else - tmp = doevery; + tmp = 1; + s++; } - else - tmp = 1; - s += UTF8SKIP(s); } break; default: @@ -1606,6 +1603,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT) /* don't bother with what can't match */ strend = HOPc(strend, -(minlen - 1)); + DEBUG_r({ + SV *prop = sv_newmortal(); + regprop(prop, c); + PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s); + }); if (find_byclass(prog, c, s, strend, startpos, 0)) goto got_it; DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n")); @@ -1619,7 +1621,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * last = screaminstr(sv, prog->float_substr, s - strbeg, end_shift, &scream_pos, 1); /* last one */ if (!last) - last = scream_olds; /* Only one occurence. */ + last = scream_olds; /* Only one occurrence. */ } else { STRLEN len; @@ -1891,6 +1893,7 @@ S_regmatch(pTHX_ regnode *prog) int minmod = 0, sw = 0, logical = 0; I32 unwind = 0; I32 firstcp = PL_savestack_ix; + register bool do_utf8 = DO_UTF8(PL_reg_sv); #ifdef DEBUGGING PL_regindent++; @@ -2009,8 +2012,8 @@ S_regmatch(pTHX_ regnode *prog) if (PL_regeol != locinput) sayNO; break; - case SANYUTF8: - if (nextchr & 0x80) { + case SANY: + if (DO_UTF8(PL_reg_sv)) { locinput += PL_utf8skip[nextchr]; if (locinput > PL_regeol) sayNO; @@ -2021,13 +2024,8 @@ S_regmatch(pTHX_ regnode *prog) sayNO; nextchr = UCHARAT(++locinput); break; - case SANY: - if (!nextchr && locinput >= PL_regeol) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case ANYUTF8: - if (nextchr & 0x80) { + case REG_ANY: + if (DO_UTF8(PL_reg_sv)) { locinput += PL_utf8skip[nextchr]; if (locinput > PL_regeol) sayNO; @@ -2038,11 +2036,6 @@ S_regmatch(pTHX_ regnode *prog) sayNO; nextchr = UCHARAT(++locinput); break; - case REG_ANY: - if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n') - sayNO; - nextchr = UCHARAT(++locinput); - break; case EXACT: s = STRING(scan); ln = STR_LEN(scan); @@ -2099,22 +2092,24 @@ S_regmatch(pTHX_ regnode *prog) locinput += ln; nextchr = UCHARAT(locinput); break; - case ANYOFUTF8: - if (!REGINCLASSUTF8(scan, (U8*)locinput)) - sayNO; - if (locinput >= PL_regeol) - sayNO; - locinput += PL_utf8skip[nextchr]; - nextchr = UCHARAT(locinput); - break; case ANYOF: - if (nextchr < 0) + if (do_utf8) { + if (!reginclass(scan, (U8*)locinput, do_utf8)) + sayNO; + if (locinput >= PL_regeol) + sayNO; + locinput += PL_utf8skip[nextchr]; nextchr = UCHARAT(locinput); - if (!REGINCLASS(scan, nextchr)) - sayNO; - if (!nextchr && locinput >= PL_regeol) - sayNO; - nextchr = UCHARAT(++locinput); + } + else { + if (nextchr < 0) + nextchr = UCHARAT(locinput); + if (!reginclass(scan, (U8*)locinput, do_utf8)) + sayNO; + if (!nextchr && locinput >= PL_regeol) + sayNO; + nextchr = UCHARAT(++locinput); + } break; case ALNUML: PL_reg_flags |= RF_tainted; @@ -2122,19 +2117,8 @@ S_regmatch(pTHX_ regnode *prog) case ALNUM: if (!nextchr) sayNO; - if (!(OP(scan) == ALNUM - ? isALNUM(nextchr) : isALNUM_LC(nextchr))) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case ALNUMLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case ALNUMUTF8: - if (!nextchr) - sayNO; - if (nextchr & 0x80) { - if (!(OP(scan) == ALNUMUTF8 + if (do_utf8) { + if (!(OP(scan) == ALNUM ? swash_fetch(PL_utf8_alnum, (U8*)locinput) : isALNUM_LC_utf8((U8*)locinput))) { @@ -2144,7 +2128,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (!(OP(scan) == ALNUMUTF8 + if (!(OP(scan) == ALNUM ? isALNUM(nextchr) : isALNUM_LC(nextchr))) sayNO; nextchr = UCHARAT(++locinput); @@ -2155,19 +2139,8 @@ S_regmatch(pTHX_ regnode *prog) case NALNUM: if (!nextchr && locinput >= PL_regeol) sayNO; - if (OP(scan) == NALNUM - ? isALNUM(nextchr) : isALNUM_LC(nextchr)) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case NALNUMLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case NALNUMUTF8: - if (!nextchr && locinput >= PL_regeol) - sayNO; - if (nextchr & 0x80) { - if (OP(scan) == NALNUMUTF8 + if (do_utf8) { + if (OP(scan) == NALNUM ? swash_fetch(PL_utf8_alnum, (U8*)locinput) : isALNUM_LC_utf8((U8*)locinput)) { @@ -2177,7 +2150,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (OP(scan) == NALNUMUTF8 + if (OP(scan) == NALNUM ? isALNUM(nextchr) : isALNUM_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); @@ -2189,42 +2162,38 @@ S_regmatch(pTHX_ regnode *prog) case BOUND: case NBOUND: /* was last char in word? */ - ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev; - if (OP(scan) == BOUND || OP(scan) == NBOUND) { - ln = isALNUM(ln); - n = isALNUM(nextchr); - } - else { - ln = isALNUM_LC(ln); - n = isALNUM_LC(nextchr); - } - if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL)) - sayNO; - break; - case BOUNDLUTF8: - case NBOUNDLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case BOUNDUTF8: - case NBOUNDUTF8: - /* was last char in word? */ - if (locinput == PL_regbol) - ln = PL_regprev; - else { - U8 *r = reghop((U8*)locinput, -1); - - ln = utf8_to_uv(r, s - (char*)r, 0, 0); - } - if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) { - ln = isALNUM_uni(ln); - n = swash_fetch(PL_utf8_alnum, (U8*)locinput); + if (do_utf8) { + if (locinput == PL_regbol) + ln = PL_regprev; + else { + U8 *r = reghop((U8*)locinput, -1); + + ln = utf8_to_uv(r, s - (char*)r, 0, 0); + } + if (OP(scan) == BOUND || OP(scan) == NBOUND) { + ln = isALNUM_uni(ln); + n = swash_fetch(PL_utf8_alnum, (U8*)locinput); + } + else { + ln = isALNUM_LC_uni(ln); + n = isALNUM_LC_utf8((U8*)locinput); + } } else { - ln = isALNUM_LC_uni(ln); - n = isALNUM_LC_utf8((U8*)locinput); + ln = (locinput != PL_regbol) ? + UCHARAT(locinput - 1) : PL_regprev; + if (OP(scan) == BOUND || OP(scan) == NBOUND) { + ln = isALNUM(ln); + n = isALNUM(nextchr); + } + else { + ln = isALNUM_LC(ln); + n = isALNUM_LC(nextchr); + } } - if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8)) - sayNO; + if (((!ln) == (!n)) == (OP(scan) == BOUND || + OP(scan) == BOUNDL)) + sayNO; break; case SPACEL: PL_reg_flags |= RF_tainted; @@ -2232,32 +2201,29 @@ S_regmatch(pTHX_ regnode *prog) case SPACE: if (!nextchr) sayNO; - if (!(OP(scan) == SPACE - ? isSPACE(nextchr) : isSPACE_LC(nextchr))) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case SPACELUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case SPACEUTF8: - if (!nextchr) - sayNO; - if (nextchr & 0x80) { - if (!(OP(scan) == SPACEUTF8 - ? swash_fetch(PL_utf8_space, (U8*)locinput) - : isSPACE_LC_utf8((U8*)locinput))) - { - sayNO; + if (DO_UTF8(PL_reg_sv)) { + if (nextchr & 0x80) { + if (!(OP(scan) == SPACE + ? swash_fetch(PL_utf8_space, (U8*)locinput) + : isSPACE_LC_utf8((U8*)locinput))) + { + sayNO; + } + locinput += PL_utf8skip[nextchr]; + nextchr = UCHARAT(locinput); + break; } - locinput += PL_utf8skip[nextchr]; - nextchr = UCHARAT(locinput); - break; + if (!(OP(scan) == SPACE + ? isSPACE(nextchr) : isSPACE_LC(nextchr))) + sayNO; + nextchr = UCHARAT(++locinput); + } + else { + if (!(OP(scan) == SPACE + ? isSPACE(nextchr) : isSPACE_LC(nextchr))) + sayNO; + nextchr = UCHARAT(++locinput); } - if (!(OP(scan) == SPACEUTF8 - ? isSPACE(nextchr) : isSPACE_LC(nextchr))) - sayNO; - nextchr = UCHARAT(++locinput); break; case NSPACEL: PL_reg_flags |= RF_tainted; @@ -2265,19 +2231,8 @@ S_regmatch(pTHX_ regnode *prog) case NSPACE: if (!nextchr && locinput >= PL_regeol) sayNO; - if (OP(scan) == NSPACE - ? isSPACE(nextchr) : isSPACE_LC(nextchr)) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case NSPACELUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case NSPACEUTF8: - if (!nextchr && locinput >= PL_regeol) - sayNO; - if (nextchr & 0x80) { - if (OP(scan) == NSPACEUTF8 + if (DO_UTF8(PL_reg_sv)) { + if (OP(scan) == NSPACE ? swash_fetch(PL_utf8_space, (U8*)locinput) : isSPACE_LC_utf8((U8*)locinput)) { @@ -2287,7 +2242,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (OP(scan) == NSPACEUTF8 + if (OP(scan) == NSPACE ? isSPACE(nextchr) : isSPACE_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); @@ -2298,19 +2253,8 @@ S_regmatch(pTHX_ regnode *prog) case DIGIT: if (!nextchr) sayNO; - if (!(OP(scan) == DIGIT - ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case DIGITLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case DIGITUTF8: - if (!nextchr) - sayNO; - if (nextchr & 0x80) { - if (!(OP(scan) == DIGITUTF8 + if (DO_UTF8(PL_reg_sv)) { + if (!(OP(scan) == DIGIT ? swash_fetch(PL_utf8_digit, (U8*)locinput) : isDIGIT_LC_utf8((U8*)locinput))) { @@ -2320,7 +2264,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (!(OP(scan) == DIGITUTF8 + if (!(OP(scan) == DIGIT ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))) sayNO; nextchr = UCHARAT(++locinput); @@ -2331,19 +2275,8 @@ S_regmatch(pTHX_ regnode *prog) case NDIGIT: if (!nextchr && locinput >= PL_regeol) sayNO; - if (OP(scan) == NDIGIT - ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) - sayNO; - nextchr = UCHARAT(++locinput); - break; - case NDIGITLUTF8: - PL_reg_flags |= RF_tainted; - /* FALL THROUGH */ - case NDIGITUTF8: - if (!nextchr && locinput >= PL_regeol) - sayNO; - if (nextchr & 0x80) { - if (OP(scan) == NDIGITUTF8 + if (DO_UTF8(PL_reg_sv)) { + if (OP(scan) == NDIGIT ? swash_fetch(PL_utf8_digit, (U8*)locinput) : isDIGIT_LC_utf8((U8*)locinput)) { @@ -2353,7 +2286,7 @@ S_regmatch(pTHX_ regnode *prog) nextchr = UCHARAT(locinput); break; } - if (OP(scan) == NDIGITUTF8 + if (OP(scan) == NDIGIT ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)) sayNO; nextchr = UCHARAT(++locinput); @@ -3461,30 +3394,33 @@ S_regrepeat(pTHX_ regnode *p, I32 max) register I32 c; register char *loceol = PL_regeol; register I32 hardcount = 0; + register bool do_utf8 = DO_UTF8(PL_reg_sv); scan = PL_reginput; if (max != REG_INFTY && max < loceol - scan) loceol = scan + max; switch (OP(p)) { case REG_ANY: - while (scan < loceol && *scan != '\n') - scan++; - break; - case SANY: - scan = loceol; - break; - case ANYUTF8: - loceol = PL_regeol; - while (scan < loceol && *scan != '\n') { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && *scan != '\n') { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && *scan != '\n') + scan++; } break; - case SANYUTF8: - loceol = PL_regeol; - while (scan < loceol) { - scan += UTF8SKIP(scan); - hardcount++; + case SANY: + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + scan = loceol; } break; case EXACT: /* length of string is 1 */ @@ -3505,135 +3441,144 @@ S_regrepeat(pTHX_ regnode *p, I32 max) (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c])) scan++; break; - case ANYOFUTF8: - loceol = PL_regeol; - while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; - } - break; case ANYOF: - while (scan < loceol && REGINCLASS(p, *scan)) - scan++; + if (do_utf8) { + loceol = PL_regeol; + while (scan < loceol && reginclass(p, (U8*)scan, do_utf8)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && reginclass(p, (U8*)scan, do_utf8)) + scan++; + } break; case ALNUM: - while (scan < loceol && isALNUM(*scan)) - scan++; - break; - case ALNUMUTF8: - loceol = PL_regeol; - while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && isALNUM(*scan)) + scan++; } break; case ALNUML: PL_reg_flags |= RF_tainted; - while (scan < loceol && isALNUM_LC(*scan)) - scan++; - break; - case ALNUMLUTF8: - PL_reg_flags |= RF_tainted; - loceol = PL_regeol; - while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && isALNUM_LC(*scan)) + scan++; } break; - break; case NALNUM: - while (scan < loceol && !isALNUM(*scan)) - scan++; - break; - case NALNUMUTF8: - loceol = PL_regeol; - while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && !isALNUM(*scan)) + scan++; } break; case NALNUML: PL_reg_flags |= RF_tainted; - while (scan < loceol && !isALNUM_LC(*scan)) - scan++; - break; - case NALNUMLUTF8: - PL_reg_flags |= RF_tainted; - loceol = PL_regeol; - while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && !isALNUM_LC(*scan)) + scan++; } break; case SPACE: - while (scan < loceol && isSPACE(*scan)) - scan++; - break; - case SPACEUTF8: - loceol = PL_regeol; - while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && + (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && isSPACE(*scan)) + scan++; } break; case SPACEL: PL_reg_flags |= RF_tainted; - while (scan < loceol && isSPACE_LC(*scan)) - scan++; - break; - case SPACELUTF8: - PL_reg_flags |= RF_tainted; - loceol = PL_regeol; - while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && + (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && isSPACE_LC(*scan)) + scan++; } break; case NSPACE: - while (scan < loceol && !isSPACE(*scan)) - scan++; - break; - case NSPACEUTF8: - loceol = PL_regeol; - while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && + !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && !isSPACE(*scan)) + scan++; + break; } - break; case NSPACEL: PL_reg_flags |= RF_tainted; - while (scan < loceol && !isSPACE_LC(*scan)) - scan++; - break; - case NSPACELUTF8: - PL_reg_flags |= RF_tainted; - loceol = PL_regeol; - while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && + !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && !isSPACE_LC(*scan)) + scan++; } break; case DIGIT: - while (scan < loceol && isDIGIT(*scan)) - scan++; - break; - case DIGITUTF8: - loceol = PL_regeol; - while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && isDIGIT(*scan)) + scan++; } break; - break; case NDIGIT: - while (scan < loceol && !isDIGIT(*scan)) - scan++; - break; - case NDIGITUTF8: - loceol = PL_regeol; - while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) { - scan += UTF8SKIP(scan); - hardcount++; + if (DO_UTF8(PL_reg_sv)) { + loceol = PL_regeol; + while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) { + scan += UTF8SKIP(scan); + hardcount++; + } + } else { + while (scan < loceol && !isDIGIT(*scan)) + scan++; } break; default: /* Called on something of 0 width. */ @@ -3712,102 +3657,139 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp) } /* +- regclass_swash - prepare the utf8 swash +*/ + +SV * +Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp) +{ + SV *sw = NULL; + SV *si = NULL; + + if (PL_regdata && PL_regdata->count) { + U32 n = ARG(node); + + if (PL_regdata->what[n] == 's') { + SV *rv = (SV*)PL_regdata->data[n]; + AV *av = (AV*)SvRV((SV*)rv); + SV **a; + + si = *av_fetch(av, 0, FALSE); + a = av_fetch(av, 1, FALSE); + + if (a) + sw = *a; + else if (si && doinit) { + sw = swash_init("utf8", "", si, 1, 0); + (void)av_store(av, 1, sw); + } + } + } + + if (initsvp) + *initsvp = si; + + return sw; +} + +/* - reginclass - determine if a character falls into a character class */ STATIC bool -S_reginclass(pTHX_ register regnode *p, register I32 c) +S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) { - char flags = ANYOF_FLAGS(p); + char flags = ANYOF_FLAGS(n); bool match = FALSE; - c &= 0xFF; - if (ANYOF_BITMAP_TEST(p, c)) - match = TRUE; - else if (flags & ANYOF_FOLD) { - I32 cf; - if (flags & ANYOF_LOCALE) { - PL_reg_flags |= RF_tainted; - cf = PL_fold_locale[c]; + if (do_utf8 || (flags & ANYOF_UNICODE)) { + if (do_utf8 && !ANYOF_RUNTIME(n)) { + STRLEN len; + UV c = utf8_to_uv_simple(p, &len); + + if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c)) + match = TRUE; } - else - cf = PL_fold[c]; - if (ANYOF_BITMAP_TEST(p, cf)) - match = TRUE; - } - if (!match && (flags & ANYOF_CLASS)) { - PL_reg_flags |= RF_tainted; - if ( - (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c)) - ) /* How's that for a conditional? */ - { - match = TRUE; + if (!match) { + SV *sw = regclass_swash(n, TRUE, 0); + + if (sw) { + if (swash_fetch(sw, p)) + match = TRUE; + else if (flags & ANYOF_FOLD) { + U8 tmpbuf[UTF8_MAXLEN+1]; + + if (flags & ANYOF_LOCALE) { + PL_reg_flags |= RF_tainted; + uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p)); + } + else + uv_to_utf8(tmpbuf, toLOWER_utf8(p)); + if (swash_fetch(sw, tmpbuf)) + match = TRUE; + } + } } } + else { + U8 c = *p; - return (flags & ANYOF_INVERT) ? !match : match; -} - -STATIC bool -S_reginclassutf8(pTHX_ regnode *f, U8 *p) -{ - char flags = ARG1(f); - bool match = FALSE; -#ifdef DEBUGGING - SV *rv = (SV*)PL_regdata->data[ARG2(f)]; - AV *av = (AV*)SvRV((SV*)rv); - SV *sw = *av_fetch(av, 0, FALSE); - SV *lv = *av_fetch(av, 1, FALSE); -#else - SV *sw = (SV*)PL_regdata->data[ARG2(f)]; -#endif + if (ANYOF_BITMAP_TEST(n, c)) + match = TRUE; + else if (flags & ANYOF_FOLD) { + I32 f; - if (swash_fetch(sw, p)) - match = TRUE; - else if (flags & ANYOF_FOLD) { - U8 tmpbuf[UTF8_MAXLEN+1]; - if (flags & ANYOF_LOCALE) { + if (flags & ANYOF_LOCALE) { + PL_reg_flags |= RF_tainted; + f = PL_fold_locale[c]; + } + else + f = PL_fold[c]; + if (f != c && ANYOF_BITMAP_TEST(n, f)) + match = TRUE; + } + + if (!match && (flags & ANYOF_CLASS)) { PL_reg_flags |= RF_tainted; - uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p)); + if ( + (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) || + (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c)) + ) /* How's that for a conditional? */ + { + match = TRUE; + } } - else - uv_to_utf8(tmpbuf, toLOWER_utf8(p)); - if (swash_fetch(sw, tmpbuf)) - match = TRUE; } - /* UTF8 combined with ANYOF_CLASS is ill-defined. */ - return (flags & ANYOF_INVERT) ? !match : match; } @@ -3815,17 +3797,20 @@ STATIC U8 * S_reghop(pTHX_ U8 *s, I32 off) { if (off >= 0) { - while (off-- && s < (U8*)PL_regeol) + while (off-- && s < (U8*)PL_regeol) { + /* XXX could check well-formedness here */ s += UTF8SKIP(s); + } } else { while (off++) { if (s > (U8*)PL_bostr) { s--; - if (*s & 0x80) { - while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80) + if (UTF8_IS_CONTINUED(*s)) { + while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s)) s--; - } /* XXX could check well-formedness here */ + } + /* XXX could check well-formedness here */ } } } @@ -3836,8 +3821,10 @@ STATIC U8 * S_reghopmaybe(pTHX_ U8* s, I32 off) { if (off >= 0) { - while (off-- && s < (U8*)PL_regeol) + while (off-- && s < (U8*)PL_regeol) { + /* XXX could check well-formedness here */ s += UTF8SKIP(s); + } if (off >= 0) return 0; } @@ -3845,10 +3832,11 @@ S_reghopmaybe(pTHX_ U8* s, I32 off) while (off++) { if (s > (U8*)PL_bostr) { s--; - if (*s & 0x80) { - while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80) + if (UTF8_IS_CONTINUED(*s)) { + while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s)) s--; - } /* XXX could check well-formedness here */ + } + /* XXX could check well-formedness here */ } else break; diff --git a/regnodes.h b/regnodes.h index 89c78e6..00dc0ec 100644 --- a/regnodes.h +++ b/regnodes.h @@ -13,76 +13,57 @@ #define MEOL 7 /* 0x7 Same, assuming multiline. */ #define SEOL 8 /* 0x8 Same, assuming singleline. */ #define BOUND 9 /* 0x9 Match "" at any word boundary */ -#define BOUNDUTF8 10 /* 0xa Match "" at any word boundary */ -#define BOUNDL 11 /* 0xb Match "" at any word boundary */ -#define BOUNDLUTF8 12 /* 0xc Match "" at any word boundary */ -#define NBOUND 13 /* 0xd Match "" at any word non-boundary */ -#define NBOUNDUTF8 14 /* 0xe Match "" at any word non-boundary */ -#define NBOUNDL 15 /* 0xf Match "" at any word non-boundary */ -#define NBOUNDLUTF8 16 /* 0x10 Match "" at any word non-boundary */ -#define GPOS 17 /* 0x11 Matches where last m//g left off. */ -#define REG_ANY 18 /* 0x12 Match any one character (except newline). */ -#define ANYUTF8 19 /* 0x13 Match any one Unicode character (except newline). */ -#define SANY 20 /* 0x14 Match any one character. */ -#define SANYUTF8 21 /* 0x15 Match any one Unicode character. */ -#define ANYOF 22 /* 0x16 Match character in (or not in) this class. */ -#define ANYOFUTF8 23 /* 0x17 Match character in (or not in) this class. */ -#define ALNUM 24 /* 0x18 Match any alphanumeric character */ -#define ALNUMUTF8 25 /* 0x19 Match any alphanumeric character in utf8 */ -#define ALNUML 26 /* 0x1a Match any alphanumeric char in locale */ -#define ALNUMLUTF8 27 /* 0x1b Match any alphanumeric char in locale+utf8 */ -#define NALNUM 28 /* 0x1c Match any non-alphanumeric character */ -#define NALNUMUTF8 29 /* 0x1d Match any non-alphanumeric character in utf8 */ -#define NALNUML 30 /* 0x1e Match any non-alphanumeric char in locale */ -#define NALNUMLUTF8 31 /* 0x1f Match any non-alphanumeric char in locale+utf8 */ -#define SPACE 32 /* 0x20 Match any whitespace character */ -#define SPACEUTF8 33 /* 0x21 Match any whitespace character in utf8 */ -#define SPACEL 34 /* 0x22 Match any whitespace char in locale */ -#define SPACELUTF8 35 /* 0x23 Match any whitespace char in locale+utf8 */ -#define NSPACE 36 /* 0x24 Match any non-whitespace character */ -#define NSPACEUTF8 37 /* 0x25 Match any non-whitespace character in utf8 */ -#define NSPACEL 38 /* 0x26 Match any non-whitespace char in locale */ -#define NSPACELUTF8 39 /* 0x27 Match any non-whitespace char in locale+utf8 */ -#define DIGIT 40 /* 0x28 Match any numeric character */ -#define DIGITUTF8 41 /* 0x29 Match any numeric character in utf8 */ -#define DIGITL 42 /* 0x2a Match any numeric character in locale */ -#define DIGITLUTF8 43 /* 0x2b Match any numeric character in locale+utf8 */ -#define NDIGIT 44 /* 0x2c Match any non-numeric character */ -#define NDIGITUTF8 45 /* 0x2d Match any non-numeric character in utf8 */ -#define NDIGITL 46 /* 0x2e Match any non-numeric character in locale */ -#define NDIGITLUTF8 47 /* 0x2f Match any non-numeric character in locale+utf8 */ -#define CLUMP 48 /* 0x30 Match any combining character sequence */ -#define BRANCH 49 /* 0x31 Match this alternative, or the next... */ -#define BACK 50 /* 0x32 Match "", "next" ptr points backward. */ -#define EXACT 51 /* 0x33 Match this string (preceded by length). */ -#define EXACTF 52 /* 0x34 Match this string, folded (prec. by length). */ -#define EXACTFL 53 /* 0x35 Match this string, folded in locale (w/len). */ -#define NOTHING 54 /* 0x36 Match empty string. */ -#define TAIL 55 /* 0x37 Match empty string. Can jump here from outside. */ -#define STAR 56 /* 0x38 Match this (simple) thing 0 or more times. */ -#define PLUS 57 /* 0x39 Match this (simple) thing 1 or more times. */ -#define CURLY 58 /* 0x3a Match this simple thing {n,m} times. */ -#define CURLYN 59 /* 0x3b Match next-after-this simple thing */ -#define CURLYM 60 /* 0x3c Match this medium-complex thing {n,m} times. */ -#define CURLYX 61 /* 0x3d Match this complex thing {n,m} times. */ -#define WHILEM 62 /* 0x3e Do curly processing and see if rest matches. */ -#define OPEN 63 /* 0x3f Mark this point in input as start of #n. */ -#define CLOSE 64 /* 0x40 Analogous to OPEN. */ -#define REF 65 /* 0x41 Match some already matched string */ -#define REFF 66 /* 0x42 Match already matched string, folded */ -#define REFFL 67 /* 0x43 Match already matched string, folded in loc. */ -#define IFMATCH 68 /* 0x44 Succeeds if the following matches. */ -#define UNLESSM 69 /* 0x45 Fails if the following matches. */ -#define SUSPEND 70 /* 0x46 "Independent" sub-RE. */ -#define IFTHEN 71 /* 0x47 Switch, should be preceeded by switcher . */ -#define GROUPP 72 /* 0x48 Whether the group matched. */ -#define LONGJMP 73 /* 0x49 Jump far away. */ -#define BRANCHJ 74 /* 0x4a BRANCH with long offset. */ -#define EVAL 75 /* 0x4b Execute some Perl code. */ -#define MINMOD 76 /* 0x4c Next operator is not greedy. */ -#define LOGICAL 77 /* 0x4d Next opcode should set the flag only. */ -#define RENUM 78 /* 0x4e Group with independently numbered parens. */ -#define OPTIMIZED 79 /* 0x4f Placeholder for dump. */ +#define BOUNDL 10 /* 0xa Match "" at any word boundary */ +#define NBOUND 11 /* 0xb Match "" at any word non-boundary */ +#define NBOUNDL 12 /* 0xc Match "" at any word non-boundary */ +#define GPOS 13 /* 0xd Matches where last m//g left off. */ +#define REG_ANY 14 /* 0xe Match any one character (except newline). */ +#define SANY 15 /* 0xf Match any one character. */ +#define ANYOF 16 /* 0x10 Match character in (or not in) this class. */ +#define ALNUM 17 /* 0x11 Match any alphanumeric character */ +#define ALNUML 18 /* 0x12 Match any alphanumeric char in locale */ +#define NALNUM 19 /* 0x13 Match any non-alphanumeric character */ +#define NALNUML 20 /* 0x14 Match any non-alphanumeric char in locale */ +#define SPACE 21 /* 0x15 Match any whitespace character */ +#define SPACEL 22 /* 0x16 Match any whitespace char in locale */ +#define NSPACE 23 /* 0x17 Match any non-whitespace character */ +#define NSPACEL 24 /* 0x18 Match any non-whitespace char in locale */ +#define DIGIT 25 /* 0x19 Match any numeric character */ +#define DIGITL 26 /* 0x1a Match any numeric character in locale */ +#define NDIGIT 27 /* 0x1b Match any non-numeric character */ +#define NDIGITL 28 /* 0x1c Match any non-numeric character in locale */ +#define CLUMP 29 /* 0x1d Match any combining character sequence */ +#define BRANCH 30 /* 0x1e Match this alternative, or the next... */ +#define BACK 31 /* 0x1f Match "", "next" ptr points backward. */ +#define EXACT 32 /* 0x20 Match this string (preceded by length). */ +#define EXACTF 33 /* 0x21 Match this string, folded (prec. by length). */ +#define EXACTFL 34 /* 0x22 Match this string, folded in locale (w/len). */ +#define NOTHING 35 /* 0x23 Match empty string. */ +#define TAIL 36 /* 0x24 Match empty string. Can jump here from outside. */ +#define STAR 37 /* 0x25 Match this (simple) thing 0 or more times. */ +#define PLUS 38 /* 0x26 Match this (simple) thing 1 or more times. */ +#define CURLY 39 /* 0x27 Match this simple thing {n,m} times. */ +#define CURLYN 40 /* 0x28 Match next-after-this simple thing */ +#define CURLYM 41 /* 0x29 Match this medium-complex thing {n,m} times. */ +#define CURLYX 42 /* 0x2a Match this complex thing {n,m} times. */ +#define WHILEM 43 /* 0x2b Do curly processing and see if rest matches. */ +#define OPEN 44 /* 0x2c Mark this point in input as start of #n. */ +#define CLOSE 45 /* 0x2d Analogous to OPEN. */ +#define REF 46 /* 0x2e Match some already matched string */ +#define REFF 47 /* 0x2f Match already matched string, folded */ +#define REFFL 48 /* 0x30 Match already matched string, folded in loc. */ +#define IFMATCH 49 /* 0x31 Succeeds if the following matches. */ +#define UNLESSM 50 /* 0x32 Fails if the following matches. */ +#define SUSPEND 51 /* 0x33 "Independent" sub-RE. */ +#define IFTHEN 52 /* 0x34 Switch, should be preceeded by switcher . */ +#define GROUPP 53 /* 0x35 Whether the group matched. */ +#define LONGJMP 54 /* 0x36 Jump far away. */ +#define BRANCHJ 55 /* 0x37 BRANCH with long offset. */ +#define EVAL 56 /* 0x38 Execute some Perl code. */ +#define MINMOD 57 /* 0x39 Next operator is not greedy. */ +#define LOGICAL 58 /* 0x3a Next opcode should set the flag only. */ +#define RENUM 59 /* 0x3b Group with independently numbered parens. */ +#define OPTIMIZED 60 /* 0x3c Placeholder for dump. */ #ifndef DOINIT EXTCONST U8 PL_regkind[]; @@ -98,44 +79,25 @@ EXTCONST U8 PL_regkind[] = { EOL, /* MEOL */ EOL, /* SEOL */ BOUND, /* BOUND */ - BOUND, /* BOUNDUTF8 */ BOUND, /* BOUNDL */ - BOUND, /* BOUNDLUTF8 */ NBOUND, /* NBOUND */ - NBOUND, /* NBOUNDUTF8 */ NBOUND, /* NBOUNDL */ - NBOUND, /* NBOUNDLUTF8 */ GPOS, /* GPOS */ REG_ANY, /* REG_ANY */ - REG_ANY, /* ANYUTF8 */ REG_ANY, /* SANY */ - REG_ANY, /* SANYUTF8 */ ANYOF, /* ANYOF */ - ANYOF, /* ANYOFUTF8 */ ALNUM, /* ALNUM */ - ALNUM, /* ALNUMUTF8 */ ALNUM, /* ALNUML */ - ALNUM, /* ALNUMLUTF8 */ NALNUM, /* NALNUM */ - NALNUM, /* NALNUMUTF8 */ NALNUM, /* NALNUML */ - NALNUM, /* NALNUMLUTF8 */ SPACE, /* SPACE */ - SPACE, /* SPACEUTF8 */ SPACE, /* SPACEL */ - SPACE, /* SPACELUTF8 */ NSPACE, /* NSPACE */ - NSPACE, /* NSPACEUTF8 */ NSPACE, /* NSPACEL */ - NSPACE, /* NSPACELUTF8 */ DIGIT, /* DIGIT */ - DIGIT, /* DIGITUTF8 */ DIGIT, /* DIGITL */ - DIGIT, /* DIGITLUTF8 */ NDIGIT, /* NDIGIT */ - NDIGIT, /* NDIGITUTF8 */ NDIGIT, /* NDIGITL */ - NDIGIT, /* NDIGITLUTF8 */ CLUMP, /* CLUMP */ BRANCH, /* BRANCH */ BACK, /* BACK */ @@ -184,44 +146,25 @@ static const U8 regarglen[] = { 0, /* MEOL */ 0, /* SEOL */ 0, /* BOUND */ - 0, /* BOUNDUTF8 */ 0, /* BOUNDL */ - 0, /* BOUNDLUTF8 */ 0, /* NBOUND */ - 0, /* NBOUNDUTF8 */ 0, /* NBOUNDL */ - 0, /* NBOUNDLUTF8 */ 0, /* GPOS */ 0, /* REG_ANY */ - 0, /* ANYUTF8 */ 0, /* SANY */ - 0, /* SANYUTF8 */ 0, /* ANYOF */ - EXTRA_SIZE(struct regnode_1), /* ANYOFUTF8 */ 0, /* ALNUM */ - 0, /* ALNUMUTF8 */ 0, /* ALNUML */ - 0, /* ALNUMLUTF8 */ 0, /* NALNUM */ - 0, /* NALNUMUTF8 */ 0, /* NALNUML */ - 0, /* NALNUMLUTF8 */ 0, /* SPACE */ - 0, /* SPACEUTF8 */ 0, /* SPACEL */ - 0, /* SPACELUTF8 */ 0, /* NSPACE */ - 0, /* NSPACEUTF8 */ 0, /* NSPACEL */ - 0, /* NSPACELUTF8 */ 0, /* DIGIT */ - 0, /* DIGITUTF8 */ 0, /* DIGITL */ - 0, /* DIGITLUTF8 */ 0, /* NDIGIT */ - 0, /* NDIGITUTF8 */ 0, /* NDIGITL */ - 0, /* NDIGITLUTF8 */ 0, /* CLUMP */ 0, /* BRANCH */ 0, /* BACK */ @@ -267,44 +210,25 @@ static const char reg_off_by_arg[] = { 0, /* MEOL */ 0, /* SEOL */ 0, /* BOUND */ - 0, /* BOUNDUTF8 */ 0, /* BOUNDL */ - 0, /* BOUNDLUTF8 */ 0, /* NBOUND */ - 0, /* NBOUNDUTF8 */ 0, /* NBOUNDL */ - 0, /* NBOUNDLUTF8 */ 0, /* GPOS */ 0, /* REG_ANY */ - 0, /* ANYUTF8 */ 0, /* SANY */ - 0, /* SANYUTF8 */ 0, /* ANYOF */ - 0, /* ANYOFUTF8 */ 0, /* ALNUM */ - 0, /* ALNUMUTF8 */ 0, /* ALNUML */ - 0, /* ALNUMLUTF8 */ 0, /* NALNUM */ - 0, /* NALNUMUTF8 */ 0, /* NALNUML */ - 0, /* NALNUMLUTF8 */ 0, /* SPACE */ - 0, /* SPACEUTF8 */ 0, /* SPACEL */ - 0, /* SPACELUTF8 */ 0, /* NSPACE */ - 0, /* NSPACEUTF8 */ 0, /* NSPACEL */ - 0, /* NSPACELUTF8 */ 0, /* DIGIT */ - 0, /* DIGITUTF8 */ 0, /* DIGITL */ - 0, /* DIGITLUTF8 */ 0, /* NDIGIT */ - 0, /* NDIGITUTF8 */ 0, /* NDIGITL */ - 0, /* NDIGITLUTF8 */ 0, /* CLUMP */ 0, /* BRANCH */ 0, /* BACK */ @@ -351,79 +275,60 @@ static const char * const reg_name[] = { "MEOL", /* 0x7 */ "SEOL", /* 0x8 */ "BOUND", /* 0x9 */ - "BOUNDUTF8", /* 0xa */ - "BOUNDL", /* 0xb */ - "BOUNDLUTF8", /* 0xc */ - "NBOUND", /* 0xd */ - "NBOUNDUTF8", /* 0xe */ - "NBOUNDL", /* 0xf */ - "NBOUNDLUTF8", /* 0x10 */ - "GPOS", /* 0x11 */ - "REG_ANY", /* 0x12 */ - "ANYUTF8", /* 0x13 */ - "SANY", /* 0x14 */ - "SANYUTF8", /* 0x15 */ - "ANYOF", /* 0x16 */ - "ANYOFUTF8", /* 0x17 */ - "ALNUM", /* 0x18 */ - "ALNUMUTF8", /* 0x19 */ - "ALNUML", /* 0x1a */ - "ALNUMLUTF8", /* 0x1b */ - "NALNUM", /* 0x1c */ - "NALNUMUTF8", /* 0x1d */ - "NALNUML", /* 0x1e */ - "NALNUMLUTF8", /* 0x1f */ - "SPACE", /* 0x20 */ - "SPACEUTF8", /* 0x21 */ - "SPACEL", /* 0x22 */ - "SPACELUTF8", /* 0x23 */ - "NSPACE", /* 0x24 */ - "NSPACEUTF8", /* 0x25 */ - "NSPACEL", /* 0x26 */ - "NSPACELUTF8", /* 0x27 */ - "DIGIT", /* 0x28 */ - "DIGITUTF8", /* 0x29 */ - "DIGITL", /* 0x2a */ - "DIGITLUTF8", /* 0x2b */ - "NDIGIT", /* 0x2c */ - "NDIGITUTF8", /* 0x2d */ - "NDIGITL", /* 0x2e */ - "NDIGITLUTF8", /* 0x2f */ - "CLUMP", /* 0x30 */ - "BRANCH", /* 0x31 */ - "BACK", /* 0x32 */ - "EXACT", /* 0x33 */ - "EXACTF", /* 0x34 */ - "EXACTFL", /* 0x35 */ - "NOTHING", /* 0x36 */ - "TAIL", /* 0x37 */ - "STAR", /* 0x38 */ - "PLUS", /* 0x39 */ - "CURLY", /* 0x3a */ - "CURLYN", /* 0x3b */ - "CURLYM", /* 0x3c */ - "CURLYX", /* 0x3d */ - "WHILEM", /* 0x3e */ - "OPEN", /* 0x3f */ - "CLOSE", /* 0x40 */ - "REF", /* 0x41 */ - "REFF", /* 0x42 */ - "REFFL", /* 0x43 */ - "IFMATCH", /* 0x44 */ - "UNLESSM", /* 0x45 */ - "SUSPEND", /* 0x46 */ - "IFTHEN", /* 0x47 */ - "GROUPP", /* 0x48 */ - "LONGJMP", /* 0x49 */ - "BRANCHJ", /* 0x4a */ - "EVAL", /* 0x4b */ - "MINMOD", /* 0x4c */ - "LOGICAL", /* 0x4d */ - "RENUM", /* 0x4e */ - "OPTIMIZED", /* 0x4f */ + "BOUNDL", /* 0xa */ + "NBOUND", /* 0xb */ + "NBOUNDL", /* 0xc */ + "GPOS", /* 0xd */ + "REG_ANY", /* 0xe */ + "SANY", /* 0xf */ + "ANYOF", /* 0x10 */ + "ALNUM", /* 0x11 */ + "ALNUML", /* 0x12 */ + "NALNUM", /* 0x13 */ + "NALNUML", /* 0x14 */ + "SPACE", /* 0x15 */ + "SPACEL", /* 0x16 */ + "NSPACE", /* 0x17 */ + "NSPACEL", /* 0x18 */ + "DIGIT", /* 0x19 */ + "DIGITL", /* 0x1a */ + "NDIGIT", /* 0x1b */ + "NDIGITL", /* 0x1c */ + "CLUMP", /* 0x1d */ + "BRANCH", /* 0x1e */ + "BACK", /* 0x1f */ + "EXACT", /* 0x20 */ + "EXACTF", /* 0x21 */ + "EXACTFL", /* 0x22 */ + "NOTHING", /* 0x23 */ + "TAIL", /* 0x24 */ + "STAR", /* 0x25 */ + "PLUS", /* 0x26 */ + "CURLY", /* 0x27 */ + "CURLYN", /* 0x28 */ + "CURLYM", /* 0x29 */ + "CURLYX", /* 0x2a */ + "WHILEM", /* 0x2b */ + "OPEN", /* 0x2c */ + "CLOSE", /* 0x2d */ + "REF", /* 0x2e */ + "REFF", /* 0x2f */ + "REFFL", /* 0x30 */ + "IFMATCH", /* 0x31 */ + "UNLESSM", /* 0x32 */ + "SUSPEND", /* 0x33 */ + "IFTHEN", /* 0x34 */ + "GROUPP", /* 0x35 */ + "LONGJMP", /* 0x36 */ + "BRANCHJ", /* 0x37 */ + "EVAL", /* 0x38 */ + "MINMOD", /* 0x39 */ + "LOGICAL", /* 0x3a */ + "RENUM", /* 0x3b */ + "OPTIMIZED", /* 0x3c */ }; -static const int reg_num = 80; +static const int reg_num = 61; #endif /* DEBUGGING */ #endif /* REG_COMP_C */ diff --git a/sv.c b/sv.c index 1dafbf6..1fbf83f 100644 --- a/sv.c +++ b/sv.c @@ -4522,11 +4522,9 @@ Perl_sv_len_utf8(pTHX_ register SV *sv) if (!sv) return 0; -#ifdef NOTYET if (SvGMAGICAL(sv)) return mg_length(sv); else -#endif { STRLEN len; U8 *s = (U8*)SvPV(sv, len); diff --git a/t/base/commonsense.t b/t/base/commonsense.t index 155c534..6e31307 100644 --- a/t/base/commonsense.t +++ b/t/base/commonsense.t @@ -15,7 +15,8 @@ if (($Config{'extensions'} !~ /\bIO\b/) ){ print "Bail out! Perl configured without IO module\n"; exit 0; } -if (($Config{'extensions'} !~ /\bFile\/Glob\b/) ){ +# hey, DOS users do not need this kind of common sense ;-) +if ($^O ne 'dos' && ($Config{'extensions'} !~ /\bFile\/Glob\b/) ){ print "Bail out! Perl configured without File::Glob module\n"; exit 0; } diff --git a/t/lib/glob-basic.t b/t/lib/glob-basic.t index e8a2905..be3280c 100755 --- a/t/lib/glob-basic.t +++ b/t/lib/glob-basic.t @@ -39,7 +39,7 @@ print "ok 2\n"; # look up the user's home directory # should return a list with one item, and not set ERROR -if ($^O ne 'MSWin32' && $^O ne 'VMS') { +if ($^O ne 'MSWin32' && $^O ne 'VMS' && $^O ne 'cygwin') { eval { ($name, $home) = (getpwuid($>))[0,7]; 1; diff --git a/t/op/64bitint.t b/t/op/64bitint.t index 88fbc55..47779dd 100644 --- a/t/op/64bitint.t +++ b/t/op/64bitint.t @@ -16,7 +16,7 @@ BEGIN { # 32+ bit integers don't cause noise no warnings qw(overflow portable); -print "1..55\n"; +print "1..57\n"; my $q = 12345678901; my $r = 23456789012; @@ -294,4 +294,30 @@ $q = 18446744073709551615; print "# $q ne\n# 18446744073709551615\nnot " unless "$q" eq "18446744073709551615"; print "ok 55\n"; +# Test that sv_2nv then sv_2iv is the same as sv_2iv direct +# fails if whatever Atol is defined as can't actually cope with >32 bits. +my $num = 4294967297; +my $string = "4294967297"; +{ + use integer; + $num += 0; + $string += 0; +} +if ($num eq $string) { + print "ok 56\n"; +} else { + print "not ok 56 # \"$num\" ne \"$string\"\n"; +} + +# Test that sv_2nv then sv_2uv is the same as sv_2uv direct +$num = 4294967297; +$string = "4294967297"; +$num &= 0; +$string &= 0; +if ($num eq $string) { + print "ok 57\n"; +} else { + print "not ok 57 # \"$num\" ne \"$string\"\n"; +} + # eof diff --git a/t/op/goto_xs.t b/t/op/goto_xs.t index cf2cafd..dc8e7d7 100755 --- a/t/op/goto_xs.t +++ b/t/op/goto_xs.t @@ -35,7 +35,7 @@ $VALID = 'LOCK_SH'; ### First, we check whether Fcntl::constant returns sane answers. # Fcntl::constant("LOCK_SH",0) should always succeed. -$value = Fcntl::constant($VALID,0); +$value = Fcntl::constant($VALID); print((!defined $value) ? "not ok 1\n# Sanity check broke, remaining tests will fail.\n" : "ok 1\n"); @@ -45,20 +45,20 @@ print((!defined $value) # test "goto &function_constant" sub goto_const { goto &Fcntl::constant; } -$ret = goto_const($VALID,0); +$ret = goto_const($VALID); print(($ret == $value) ? "ok 2\n" : "not ok 2\n# ($ret != $value)\n"); # test "goto &$function_package_and_name" $FNAME1 = 'Fcntl::constant'; sub goto_name1 { goto &$FNAME1; } -$ret = goto_name1($VALID,0); +$ret = goto_name1($VALID); print(($ret == $value) ? "ok 3\n" : "not ok 3\n# ($ret != $value)\n"); # test "goto &$function_package_and_name" again, with dirtier stack -$ret = goto_name1($VALID,0); +$ret = goto_name1($VALID); print(($ret == $value) ? "ok 4\n" : "not ok 4\n# ($ret != $value)\n"); -$ret = goto_name1($VALID,0); +$ret = goto_name1($VALID); print(($ret == $value) ? "ok 5\n" : "not ok 5\n# ($ret != $value)\n"); # test "goto &$function_name" from local package @@ -67,14 +67,14 @@ $FNAME2 = 'constant'; sub goto_name2 { goto &$FNAME2; } package main; -$ret = Fcntl::goto_name2($VALID,0); +$ret = Fcntl::goto_name2($VALID); print(($ret == $value) ? "ok 6\n" : "not ok 6\n# ($ret != $value)\n"); # test "goto &$function_ref" $FREF = \&Fcntl::constant; sub goto_ref { goto &$FREF; } -$ret = goto_ref($VALID,0); +$ret = goto_ref($VALID); print(($ret == $value) ? "ok 7\n" : "not ok 7\n# ($ret != $value)\n"); ### tests where the args are not on stack but in GvAV(defgv) (ie, @_) @@ -82,17 +82,17 @@ print(($ret == $value) ? "ok 7\n" : "not ok 7\n# ($ret != $value)\n"); # test "goto &function_constant" from a sub called without arglist sub call_goto_const { &goto_const; } -$ret = call_goto_const($VALID,0); +$ret = call_goto_const($VALID); print(($ret == $value) ? "ok 8\n" : "not ok 8\n# ($ret != $value)\n"); # test "goto &$function_package_and_name" from a sub called without arglist sub call_goto_name1 { &goto_name1; } -$ret = call_goto_name1($VALID,0); +$ret = call_goto_name1($VALID); print(($ret == $value) ? "ok 9\n" : "not ok 9\n# ($ret != $value)\n"); # test "goto &$function_ref" from a sub called without arglist sub call_goto_ref { &goto_ref; } -$ret = call_goto_ref($VALID,0); +$ret = call_goto_ref($VALID); print(($ret == $value) ? "ok 10\n" : "not ok 10\n# ($ret != $value)\n"); diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t index ac42b85..cd9d56a 100644 --- a/t/op/utf8decode.t +++ b/t/op/utf8decode.t @@ -5,6 +5,8 @@ BEGIN { @INC = '../lib'; } +no utf8; # this test contains raw 8-bit data on purpose; don't switch to \x{} + print "1..78\n"; my $test = 1; diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t index 6986720..89416dc 100755 --- a/t/pragma/utf8.t +++ b/t/pragma/utf8.t @@ -10,7 +10,7 @@ BEGIN { } } -print "1..90\n"; +print "1..104\n"; my $test = 1; @@ -42,6 +42,7 @@ sub nok_bytes { { use utf8; + $_ = ">\x{263A}<"; s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; ok $_, '>☺<'; @@ -106,212 +107,191 @@ sub nok_bytes { } { - use utf8; - - $_ = "\x{263A}>\x{263A}\x{263A}"; - - ok length, 4; - $test++; # 13 - - ok length((m/>(.)/)[0]), 1; - $test++; # 14 - - ok length($&), 2; - $test++; # 15 + # no use utf8 needed + $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}"; + + ok length($_), 6; # 13 + $test++; - ok length($'), 1; - $test++; # 16 + ($a) = m/x(.)/; - ok length($`), 1; - $test++; # 17 + ok length($a), 1; # 14 + $test++; - ok length($1), 1; - $test++; # 18 + ok length($`), 2; # 15 + $test++; + ok length($&), 2; # 16 + $test++; + ok length($'), 2; # 17 + $test++; - ok length($tmp=$&), 2; - $test++; # 19 + ok length($1), 1; # 18 + $test++; - ok length($tmp=$'), 1; - $test++; # 20 + ok length($b=$`), 2; # 19 + $test++; - ok length($tmp=$`), 1; - $test++; # 21 + ok length($b=$&), 2; # 20 + $test++; - ok length($tmp=$1), 1; - $test++; # 22 + ok length($b=$'), 2; # 21 + $test++; - { - use bytes; + ok length($b=$1), 1; # 22 + $test++; - my $tmp = $&; - ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); - $test++; # 23 + ok $a, "\x{263A}"; # 23 + $test++; - $tmp = $'; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 24 + ok $`, "\x{263A}\x{263A}"; # 24 + $test++; - $tmp = $`; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 25 + ok $&, "x\x{263A}"; # 25 + $test++; - $tmp = $1; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 26 - } + ok $', "y\x{263A}"; # 26 + $test++; - ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272); - $test++; # 27 + ok $1, "\x{263A}"; # 27 + $test++; - ok_bytes $', pack("C*", 0342, 0230, 0272); - $test++; # 28 + ok_bytes $a, "\342\230\272"; # 28 + $test++; - ok_bytes $`, pack("C*", 0342, 0230, 0272); - $test++; # 29 + ok_bytes $1, "\342\230\272"; # 29 + $test++; - ok_bytes $1, pack("C*", 0342, 0230, 0272); - $test++; # 30 + ok_bytes $&, "x\342\230\272"; # 30 + $test++; { - use bytes; - no utf8; - - ok length, 10; - $test++; # 31 + use utf8; # required + $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A); + } - ok length((m/>(.)/)[0]), 1; - $test++; # 32 + ok length($_), 6; # 31 + $test++; - ok length($&), 2; - $test++; # 33 + ($a) = m/x(.)/; - ok length($'), 5; - $test++; # 34 + ok length($a), 1; # 32 + $test++; - ok length($`), 3; - $test++; # 35 + ok length($`), 2; # 33 + $test++; - ok length($1), 1; - $test++; # 36 + ok length($&), 2; # 34 + $test++; - ok $&, pack("C*", ord(">"), 0342); - $test++; # 37 + ok length($'), 2; # 35 + $test++; - ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); - $test++; # 38 + ok length($1), 1; # 36 + $test++; - ok $`, pack("C*", 0342, 0230, 0272); - $test++; # 39 + ok length($b=$`), 2; # 37 + $test++; - ok $1, pack("C*", 0342); - $test++; # 40 - } + ok length($b=$&), 2; # 38 + $test++; - { - no utf8; - $_="\342\230\272>\342\230\272\342\230\272"; - } + ok length($b=$'), 2; # 39 + $test++; - ok length, 10; - $test++; # 41 + ok length($b=$1), 1; # 40 + $test++; - ok length((m/>(.)/)[0]), 1; - $test++; # 42 + ok $a, "\x{263A}"; # 41 + $test++; - ok length($&), 2; - $test++; # 43 + ok $`, "\x{263A}\x{263A}"; # 42 + $test++; - ok length($'), 1; - $test++; # 44 + ok $&, "x\x{263A}"; # 43 + $test++; - ok length($`), 1; - $test++; # 45 + ok $', "y\x{263A}"; # 44 + $test++; - ok length($1), 1; - $test++; # 46 + ok $1, "\x{263A}"; # 45 + $test++; - ok length($tmp=$&), 2; - $test++; # 47 + ok_bytes $a, "\342\230\272"; # 46 + $test++; - ok length($tmp=$'), 1; - $test++; # 48 + ok_bytes $1, "\342\230\272"; # 47 + $test++; - ok length($tmp=$`), 1; - $test++; # 49 + ok_bytes $&, "x\342\230\272"; # 48 + $test++; - ok length($tmp=$1), 1; - $test++; # 50 + $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272"; - { - use bytes; + ok length($_), 14; # 49 + $test++; - my $tmp = $&; - ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272); - $test++; # 51 + ($a) = m/x(.)/; - $tmp = $'; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 52 + ok length($a), 1; # 50 + $test++; - $tmp = $`; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 53 + ok length($`), 6; # 51 + $test++; - $tmp = $1; - ok $tmp, pack("C*", 0342, 0230, 0272); - $test++; # 54 - } + ok length($&), 2; # 52 + $test++; - { - use bytes; - no utf8; + ok length($'), 6; # 53 + $test++; - ok length, 10; - $test++; # 55 + ok length($1), 1; # 54 + $test++; - ok length((m/>(.)/)[0]), 1; - $test++; # 56 + ok length($b=$`), 6; # 55 + $test++; - ok length($&), 2; - $test++; # 57 + ok length($b=$&), 2; # 56 + $test++; - ok length($'), 5; - $test++; # 58 + ok length($b=$'), 6; # 57 + $test++; - ok length($`), 3; - $test++; # 59 + ok length($b=$1), 1; # 58 + $test++; - ok length($1), 1; - $test++; # 60 + ok $a, "\342"; # 59 + $test++; - ok $&, pack("C*", ord(">"), 0342); - $test++; # 61 + ok $`, "\342\230\272\342\230\272"; # 60 + $test++; - ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); - $test++; # 62 + ok $&, "x\342"; # 61 + $test++; - ok $`, pack("C*", 0342, 0230, 0272); - $test++; # 63 + ok $', "\230\272y\342\230\272"; # 62 + $test++; - ok $1, pack("C*", 0342); - $test++; # 64 - } + ok $1, "\342"; # 63 + $test++; +} +{ + use utf8; ok "\x{ab}" =~ /^\x{ab}$/, 1; - $test++; # 65 + $test++; # 64 } { use utf8; ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2); - $test++; # 66 + $test++; # 65 } { use utf8; my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); ok "@a", "1234 123 2345"; - $test++; # 67 + $test++; # 66 } { @@ -319,7 +299,7 @@ sub nok_bytes { my $x = chr(123); my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); ok "@a", "1234 2345"; - $test++; # 68 + $test++; # 67 } { @@ -331,10 +311,10 @@ sub nok_bytes { { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 print "not " if $a eq $b; - print "ok $test\n"; $test++; + print "ok $test\n"; $test++; # 68 { use utf8; print "not " if $a eq $b; } - print "ok $test\n"; $test++; + print "ok $test\n"; $test++; # 69 } { @@ -344,7 +324,7 @@ sub nok_bytes { for (@x) { s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; my($latin) = /^(.+)(?:\s+\d)/; - print $latin eq "stra\337e" ? "ok $test\n" : + print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71 "#latin[$latin]\nnot ok $test\n"; $test++; $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a @@ -369,7 +349,7 @@ sub nok_bytes { } print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; - print "ok $test\n"; + print "ok $test\n"; # 72 $test++; } @@ -384,27 +364,27 @@ sub nok_bytes { print "not " unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; print "ok $test\n"; - $test++; + $test++; # 73 my ($a, $b) = split(/\x{100}/, $s); print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; print "ok $test\n"; - $test++; + $test++; # 74 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; print "ok $test\n"; - $test++; + $test++; # 75 my ($a, $b) = split(/\x40\x{80}/, $s); print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; print "ok $test\n"; - $test++; + $test++; # 76 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; print "ok $test\n"; - $test++; + $test++; # 77 } { @@ -414,14 +394,14 @@ sub nok_bytes { my $smiley = "\x{263a}"; - for my $s ("\x{263a}", # 1 - $smiley, # 2 + for my $s ("\x{263a}", # 78 + $smiley, # 79 - "" . $smiley, # 3 - "" . "\x{263a}", # 4 + "" . $smiley, # 80 + "" . "\x{263a}", # 81 - $smiley . "", # 5 - "\x{263a}" . "", # 6 + $smiley . "", # 82 + "\x{263a}" . "", # 83 ) { my $length_chars = length($s); my $length_bytes; @@ -437,14 +417,14 @@ sub nok_bytes { $test++; } - for my $s ("\x{263a}" . "\x{263a}", # 7 - $smiley . $smiley, # 8 + for my $s ("\x{263a}" . "\x{263a}", # 84 + $smiley . $smiley, # 85 - "\x{263a}\x{263a}", # 9 - "$smiley$smiley", # 10 + "\x{263a}\x{263a}", # 86 + "$smiley$smiley", # 87 - "\x{263a}" x 2, # 11 - $smiley x 2, # 12 + "\x{263a}" x 2, # 88 + $smiley x 2, # 89 ) { my $length_chars = length($s); my $length_bytes; @@ -460,3 +440,106 @@ sub nok_bytes { $test++; } } + +{ + use utf8; + + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 90 + + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 91 + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 92 + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 93 + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 94 + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 95 + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok $test\n"; + $test++; # 96 + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok $test\n"; + $test++; # 97 +} + +{ + # the first half of 20001028.003 + + my $X = chr(1448); + my ($Y) = $X =~ /(.*)/; + print "not " unless length $Y == 1; + print "ok $test\n"; + $test++; # 98 +} + +{ + # 20001108.001 + + use utf8; + my $X = "Szab\x{f3},Bal\x{e1}zs"; + my $Y = $X; + $Y =~ s/(B)/$1/ for 0..3; + print "not " unless $Y eq $X; + print "ok $test\n"; + $test++; # 99 +} + +{ + # 20001114.001 + + use utf8; + use charnames ':full'; + my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; + print "not " unless ord($text) == 0xc4; + print "ok $test\n"; + $test++; # 100 +} + +{ + # 20001205.014 + + use utf8; + + my $a = "ABC\x{263A}"; + + my @b = split( //, $a ); + + print "not " unless @b == 4; + print "ok $test\n"; + $test++; # 101 + + print "not " unless length($b[3]) == 1; + print "ok $test\n"; + $test++; # 102 + + $a =~ s/^A/Z/; + print "not " unless length($a) == 4; + print "ok $test\n"; + $test++; # 103 +} + +{ + # the second half of 20001028.003 + + use utf8; + $X =~ s/^/chr(1488)/e; + print "not " unless length $X == 1; + print "ok $test\n"; + $test++; # 104 +} + diff --git a/t/pragma/warn/pp_sys b/t/pragma/warn/pp_sys index 66f3e75..e30637b 100644 --- a/t/pragma/warn/pp_sys +++ b/t/pragma/warn/pp_sys @@ -3,6 +3,15 @@ untie attempted while %d inner references still exist [pp_untie] sub TIESCALAR { bless [] } ; tie $a, 'main'; untie $a ; + fileno() on unopened filehandle abc [pp_fileno] + $a = "abc"; fileno($a) + + binmode() on unopened filehandle abc [pp_binmode] + $a = "abc"; fileno($a) + + printf() on unopened filehandle abc [pp_prtf] + $a = "abc"; printf $a "fred" + Filehandle %s opened only for input [pp_leavewrite] format STDIN = . @@ -400,3 +409,11 @@ close F ; unlink $file ; EXPECT Filehandle F opened only for output at - line 12. +######## +# pp_sys.c [pp_binmode] +use warnings 'unopened' ; +binmode(BLARG); +$a = "BLERG";binmode($a); +EXPECT +binmode() on unopened filehandle BLARG at - line 3. +binmode() on unopened filehandle at - line 4. diff --git a/uconfig.h b/uconfig.h index e547a9f..9a21350 100644 --- a/uconfig.h +++ b/uconfig.h @@ -1053,8 +1053,8 @@ * This symbol contains the ~name expanded version of ARCHLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -/*#define ARCHLIB "/usr/local/lib/perl5/5.6/unknown" / **/ -/*#define ARCHLIB_EXP "/usr/local/lib/perl5/5.6/unknown" / **/ +/*#define ARCHLIB "/usr/local/lib/perl5/5.7/unknown" / **/ +/*#define ARCHLIB_EXP "/usr/local/lib/perl5/5.7/unknown" / **/ /* ARCHNAME: * This symbol holds a string representing the architecture name. @@ -1192,6 +1192,12 @@ #define CPPRUN "" #define CPPLAST "" +/* HAS__FWALK: + * This symbol, if defined, indicates that the _fwalk system call is + * available to apply a function to all the file handles. + */ +/*#define HAS__FWALK / **/ + /* HAS_ACCESS: * This manifest constant lets the C program know that the access() * system call is available to check for accessibility using real UID/GID. @@ -1289,6 +1295,13 @@ */ /*#define HAS_ENDSERVENT / **/ +/* FCNTL_CAN_LOCK: + * This symbol, if defined, indicates that fcntl() can be used + * for file locking. Normally on Unix systems this is defined. + * It may be undefined on VMS. + */ +/*#define FCNTL_CAN_LOCK / **/ + /* HAS_FD_SET: * This symbol, when defined, indicates presence of the fd_set typedef * in @@ -1331,6 +1344,13 @@ */ /*#define HAS_FSTATFS / **/ +/* HAS_FSYNC: + * This symbol, if defined, indicates that the fsync routine is + * available to write a file's modified data and attributes to + * permanent storage. + */ +/*#define HAS_FSYNC / **/ + /* HAS_FTELLO: * This symbol, if defined, indicates that the ftello routine is * available to ftell beyond 32 bits (useful for ILP32 hosts). @@ -1484,6 +1504,17 @@ */ /*#define HAS_GETPROTOENT / **/ +/* HAS_GETPGRP: + * This symbol, if defined, indicates that the getpgrp routine is + * available to get the current process group. + */ +/* USE_BSD_GETPGRP: + * This symbol, if defined, indicates that getpgrp needs one + * arguments whereas USG one needs none. + */ +/*#define HAS_GETPGRP / **/ +/*#define USE_BSD_GETPGRP / **/ + /* HAS_GETPROTOBYNAME: * This symbol, if defined, indicates that the getprotobyname() * routine is available to look up protocols by their name. @@ -1793,6 +1824,15 @@ */ /*#define HAS_SANE_MEMCMP / **/ +/* HAS_SBRK_PROTO: + * This symbol, if defined, indicates that the system provides + * a prototype for the sbrk() function. Otherwise, it is up + * to the program to supply one. Good guesses are + * extern void* sbrk _((int)); + * extern void* sbrk _((size_t)); + */ +/*#define HAS_SBRK_PROTO / **/ + /* HAS_SEM: * This symbol, if defined, indicates that the entire sem*(2) library is * supported. @@ -1830,6 +1870,18 @@ */ /*#define HAS_SETPROTOENT / **/ +/* HAS_SETPGRP: + * This symbol, if defined, indicates that the setpgrp routine is + * available to set the current process group. + */ +/* USE_BSD_SETPGRP: + * This symbol, if defined, indicates that setpgrp needs two + * arguments whereas USG one needs none. See also HAS_SETPGID + * for a POSIX interface. + */ +/*#define HAS_SETPGRP / **/ +/*#define USE_BSD_SETPGRP / **/ + /* HAS_SETPROCTITLE: * This symbol, if defined, indicates that the setproctitle routine is * available to set process title. @@ -2031,7 +2083,7 @@ /*#define USE_STDIO_PTR / **/ #ifdef USE_STDIO_PTR #define FILE_ptr(fp) ((fp)->_IO_read_ptr) -# STDIO_PTR_LVALUE /**/ +/*#define STDIO_PTR_LVALUE / **/ #define FILE_cnt(fp) ((fp)->_IO_read_end - (fp)->_IO_read_ptr) /*#define STDIO_CNT_LVALUE / **/ /*#define STDIO_PTR_LVAL_SETS_CNT / **/ @@ -2099,13 +2151,13 @@ * This symbol, if defined, indicates that the strtoq routine is * available to convert strings to long longs (quads). */ -# HAS_STRTOQ /**/ +/*#define HAS_STRTOQ / **/ -/* HAS_STRTOQ: - * This symbol, if defined, indicates that the strtoq routine is - * available to convert strings to long longs (quads). +/* HAS_STRTOUL: + * This symbol, if defined, indicates that the strtoul routine is + * available to provide conversion of strings to unsigned long. */ -# HAS_STRTOQ /**/ +/*#define HAS_STRTOUL / **/ /* HAS_STRTOULL: * This symbol, if defined, indicates that the strtoull routine is @@ -2597,6 +2649,17 @@ #define RD_NODATA -1 #undef EOF_NONBLOCK +/* NEED_VA_COPY: + * This symbol, if defined, indicates that the system stores + * the variable argument list datatype, va_list, in a format + * that cannot be copied by simple assignment, so that some + * other means must be used when copying is required. + * As such systems vary in their provision (or non-provision) + * of copying mechanisms, handy.h defines a platform- + * independent macro, Perl_va_copy(src, dst), to do the job. + */ +/*#define NEED_VA_COPY / **/ + /* Netdb_host_t: * This symbol holds the type used for the 1st argument * to gethostbyaddr(). @@ -2729,7 +2792,7 @@ #endif #define NVSIZE 8 /**/ #undef NV_PRESERVES_UV -#define NV_PRESERVES_UV_BITS +#define NV_PRESERVES_UV_BITS 0 /* IVdf: * This symbol defines the format string used for printing a Perl IV @@ -2784,8 +2847,8 @@ * This symbol contains the ~name expanded version of PRIVLIB, to be used * in programs that are not prepared to deal with ~ expansion at run-time. */ -#define PRIVLIB "/usr/local/lib/perl5/5.6" /**/ -#define PRIVLIB_EXP "/usr/local/lib/perl5/5.6" /**/ +#define PRIVLIB "/usr/local/lib/perl5/5.7" /**/ +#define PRIVLIB_EXP "/usr/local/lib/perl5/5.7" /**/ /* PTRSIZE: * This symbol contains the size of a pointer, so that the C preprocessor @@ -2946,6 +3009,12 @@ */ #define STARTPERL "" /**/ +/* STDCHAR: + * This symbol is defined to be the type of char used in stdio.h. + * It has the values "unsigned char" or "char". + */ +#define STDCHAR char /**/ + /* HAS_STDIO_STREAM_ARRAY: * This symbol, if defined, tells that there is an array * holding the stdio streams. @@ -3164,91 +3233,4 @@ #define PERL_XS_APIVERSION "5.005" #define PERL_PM_APIVERSION "5.005" -/* HAS_GETPGRP: - * This symbol, if defined, indicates that the getpgrp routine is - * available to get the current process group. - */ -/* USE_BSD_GETPGRP: - * This symbol, if defined, indicates that getpgrp needs one - * arguments whereas USG one needs none. - */ -/*#define HAS_GETPGRP / **/ -/*#define USE_BSD_GETPGRP / **/ - -/* HAS_SETPGRP: - * This symbol, if defined, indicates that the setpgrp routine is - * available to set the current process group. - */ -/* USE_BSD_SETPGRP: - * This symbol, if defined, indicates that setpgrp needs two - * arguments whereas USG one needs none. See also HAS_SETPGID - * for a POSIX interface. - */ -/*#define HAS_SETPGRP / **/ -/*#define USE_BSD_SETPGRP / **/ - -/* HAS_STRTOUL: - * This symbol, if defined, indicates that the strtoul routine is - * available to provide conversion of strings to unsigned long. - */ -/*#define HAS_STRTOUL / **/ - -/* STDCHAR: - * This symbol is defined to be the type of char used in stdio.h. - * It has the values "unsigned char" or "char". - */ -#define STDCHAR char /**/ - -/* HAS_STRTOUL: - * This symbol, if defined, indicates that the strtoul routine is - * available to provide conversion of strings to unsigned long. - */ -/*#define HAS_STRTOUL / **/ - -/* STDCHAR: - * This symbol is defined to be the type of char used in stdio.h. - * It has the values "unsigned char" or "char". - */ -#define STDCHAR char /**/ - -/* HAS__FWALK: - * This symbol, if defined, indicates that the _fwalk system call is - * available to apply a function to all the file handles. - */ -/*#define HAS__FWALK / **/ - -/* FCNTL_CAN_LOCK: - * This symbol, if defined, indicates that fcntl() can be used - * for file locking. Normally on Unix systems this is defined. - * It may be undefined on VMS. - */ -/*#define FCNTL_CAN_LOCK / **/ - -/* HAS_FSYNC: - * This symbol, if defined, indicates that the fsync routine is - * available to write a file's modified data and attributes to - * permanent storage. - */ -# HAS_FSYNC /**/ - -/* HAS_SBRK_PROTO: - * This symbol, if defined, indicates that the system provides - * a prototype for the sbrk() function. Otherwise, it is up - * to the program to supply one. Good guesses are - * extern void* sbrk _((int)); - * extern void* sbrk _((size_t)); - */ -/*#define HAS_SBRK_PROTO / **/ - -/* NEED_VA_COPY: - * This symbol, if defined, indicates that the system stores - * the variable argument list datatype, va_list, in a format - * that cannot be copied by simple assignment, so that some - * other means must be used when copying is required. - * As such systems vary in their provision (or non-provision) - * of copying mechanisms, handy.h defines a platform- - * independent macro, Perl_va_copy(src, dst), to do the job. - */ -/*#define NEED_VA_COPY / **/ - #endif diff --git a/uconfig.sh b/uconfig.sh index 0c8dffc..f6d64ae 100755 --- a/uconfig.sh +++ b/uconfig.sh @@ -4,8 +4,8 @@ _o='.o' afs='false' alignbytes='4' apiversion='5.005' -archlib='/usr/local/lib/perl5/5.6/unknown' -archlibexp='/usr/local/lib/perl5/5.6/unknown' +archlib='/usr/local/lib/perl5/5.7/unknown' +archlibexp='/usr/local/lib/perl5/5.7/unknown' archname='unknown' bin='/usr/local/bin' bincompat5005='define' @@ -15,9 +15,7 @@ charsize='1' clocktype='clock_t' cpp_stuff='42' crosscompile='undef' -d__fwalk='undef' d_Gconvert='sprintf((b),"%.*g",(n),(x))' -d_SCNfldbl='undef' d_PRIEUldbl='undef' d_PRIFUldbl='undef' d_PRIGUldbl='undef' @@ -30,6 +28,8 @@ d_PRIi64='undef' d_PRIo64='undef' d_PRIu64='undef' d_PRIx64='undef' +d_SCNfldbl='undef' +d__fwalk='undef' d_access='undef' d_accessx='undef' d_alarm='undef' @@ -71,7 +71,6 @@ d_endnent='undef' d_endpent='undef' d_endpwent='undef' d_endsent='undef' -d_endspent='undef' d_eofnblk='undef' d_eunice='undef' d_fchmod='undef' @@ -87,14 +86,18 @@ d_flock='undef' d_fork='define' d_fpathconf='undef' d_fpos64_t='undef' +d_frexpl='undef' d_fs_data_s='undef' d_fseeko='undef' d_fsetpos='undef' d_fstatfs='undef' d_fstatvfs='undef' +d_fsync='undef' d_ftello='undef' d_ftime='undef' d_getcwd='undef' +d_getespwnam='undef' +d_getfsstat='undef' d_getgrent='undef' d_getgrps='undef' d_gethbyaddr='undef' @@ -119,6 +122,7 @@ d_getpgrp='undef' d_getppid='undef' d_getprior='undef' d_getprotoprotos='undef' +d_getprpwnam='undef' d_getpwent='undef' d_getsbyname='undef' d_getsbyport='undef' @@ -134,8 +138,10 @@ d_htonl='undef' d_iconv='undef' d_index='undef' d_inetaton='undef' -d_int64t='undef' +d_int64_t='undef' d_isascii='undef' +d_isnan='undef' +d_isnanl='undef' d_killpg='undef' d_lchown='undef' d_ldbl_dig='undef' @@ -144,7 +150,9 @@ d_locconv='undef' d_lockf='undef' d_longdbl='undef' d_longlong='undef' +d_lseekproto='undef' d_lstat='undef' +d_madvise='undef' d_mblen='undef' d_mbstowcs='undef' d_mbtowc='undef' @@ -159,6 +167,8 @@ d_mkfifo='undef' d_mkstemp='undef' d_mkstemps='undef' d_mktime='undef' +d_mmap='undef' +d_modfl='undef' d_mprotect='undef' d_msg='undef' d_msg_ctrunc='undef' @@ -175,6 +185,7 @@ d_munmap='undef' d_mymalloc='undef' d_nice='undef' d_nv_preserves_uv='undef' +d_nv_preserves_uv_bits='0' d_off64_t='undef' d_old_pthread_create_joinable='undef' d_oldpthreads='undef' @@ -182,6 +193,7 @@ d_oldsock='undef' d_open3='undef' d_pathconf='undef' d_pause='undef' +d_perl_otherlibdirs='undef' d_phostname='undef' d_pipe='undef' d_poll='undef' @@ -195,6 +207,7 @@ d_pwexpire='undef' d_pwgecos='undef' d_pwpasswd='undef' d_pwquota='undef' +d_qgcvt='undef' d_quad='undef' d_readdir='undef' d_readlink='undef' @@ -238,7 +251,6 @@ d_setrgid='undef' d_setruid='undef' d_setsent='undef' d_setsid='undef' -d_setspent='undef' d_setvbuf='undef' d_sfio='undef' d_shm='undef' @@ -250,6 +262,7 @@ d_shmget='undef' d_sigaction='undef' d_sigsetjmp='undef' d_socket='undef' +d_socklen_t='undef' d_sockpair='undef' d_socks5_init='undef' d_sqrtl='undef' @@ -258,8 +271,9 @@ d_statfs_f_flags='undef' d_statfs_s='undef' d_statvfs='undef' d_stdio_cnt_lval='undef' -d_stdio_ptr_lval_sets_cnt='undef' +d_stdio_ptr_lval='undef' d_stdio_ptr_lval_nochange_cnt='undef' +d_stdio_ptr_lval_sets_cnt='undef' d_stdio_stream_array='undef' d_stdiobase='undef' d_stdstdio='undef' @@ -272,6 +286,7 @@ d_strtod='undef' d_strtol='undef' d_strtold='undef' d_strtoll='undef' +d_strtoq='undef' d_strtoul='undef' d_strtoull='undef' d_strtouq='undef' @@ -295,6 +310,7 @@ d_umask='undef' d_uname='undef' d_union_semun='undef' d_ustat='undef' +d_vendorarch='undef' d_vendorbin='undef' d_vendorlib='undef' d_vfork='undef' @@ -349,6 +365,7 @@ i_float='undef' i_gdbm='undef' i_grp='undef' i_iconv='undef' +i_ieeefp='undef' i_inttypes='undef' i_libutil='undef' i_limits='undef' @@ -364,6 +381,7 @@ i_neterrno='undef' i_netinettcp='undef' i_niin='undef' i_poll='undef' +i_prot='undef' i_pthread='undef' i_pwd='undef' i_rpcsvcdbm='undef' @@ -375,6 +393,7 @@ i_stdarg='define' i_stddef='undef' i_stdlib='undef' i_string='define' +i_sunmath='undef' i_sysaccess='undef' i_sysdir='undef' i_sysfile='undef' @@ -382,6 +401,8 @@ i_sysfilio='undef' i_sysin='undef' i_sysioctl='undef' i_syslog='undef' +i_sysmman='undef' +i_sysmode='undef' i_sysmount='undef' i_sysndir='undef' i_sysparam='undef' @@ -398,6 +419,7 @@ i_systimes='undef' i_systypes='undef' i_sysuio='undef' i_sysun='undef' +i_sysutsname='undef' i_sysvfs='undef' i_syswait='undef' i_termio='undef' @@ -411,6 +433,7 @@ i_varargs='undef' i_varhdr='stdarg.h' i_vfork='undef' ignore_versioned_solibs='y' +inc_version_list_init='NULL' installstyle='lib/perl5' installusrbinperl='undef' intsize='4' @@ -450,8 +473,8 @@ osname='unknown' phostname='hostname' pidtype=int pm_apiversion='5.005' -privlib='/usr/local/lib/perl5/5.6' -privlibexp='/usr/local/lib/perl5/5.6' +privlib='/usr/local/lib/perl5/5.7' +privlibexp='/usr/local/lib/perl5/5.7' prototype='undef' ptrsize=1 quadkind='4' @@ -472,6 +495,7 @@ sPRIi64='"Li"' sPRIo64='"Lo"' sPRIu64='"Lu"' sPRIx64='"Lx"' +sSCNfldbl='"llf"' sched_yield='sched_yield()' scriptdir='/usr/local/bin' scriptdirexp='/usr/local/bin' @@ -484,9 +508,9 @@ sig_count='64' sig_name_init='0' sig_num_init='0' signal_t=int -sizetype=int sizesize=1 -sSCNfldbl='"llf"' +sizetype=int +socksizetype='int' ssizetype=int stdchar=char stdio_base='((fp)->_IO_read_base)' @@ -511,12 +535,12 @@ uidsize='4' uidtype=int uquadtype='uint64_t' use5005threads='undef' -use64bits='undef' +use64bitall='undef' +use64bitint='undef' usedl='undef' useithreads='undef' uselargefiles='undef' uselongdouble='undef' -uselonglong='undef' usemorebits='undef' usemultiplicity='undef' usemymalloc='n' @@ -538,28 +562,3 @@ uvxformat='"lx"' versiononly='undef' voidflags=1 xs_apiversion='5.005' -d_getfsstat='undef' -d_int64_t='undef' -d_lseekproto='undef' -d_madvise='undef' -d_mmap='undef' -use64bitint='undef' -use64bitall='undef' -d_vendorarch='undef' -d_vendorarch='undef' -i_ieeefp='undef' -i_sunmath='undef' -i_sysmode='undef' -i_sysutsname='undef' -d_frexpl='undef' -d_modfl='undef' -d_getespwnam='undef' -d_getprpwnam='undef' -d_isnan='undef' -d_isnanl='undef' -i_prot='undef' -d_perl_otherlibdirs='undef' -inc_version_list_init='NULL' -socksizetype='int' - - diff --git a/utils/h2xs.PL b/utils/h2xs.PL index edc2bb5..88ac482 100644 --- a/utils/h2xs.PL +++ b/utils/h2xs.PL @@ -41,7 +41,7 @@ h2xs - convert .h C header files to Perl extensions =head1 SYNOPSIS -B [B<-ACOPXacdfkmx>] [B<-F> addflags] [B<-M> fmask] [B<-n> module_name] [B<-o> tmask] [B<-p> prefix] [B<-s> subs] [B<-v> version] [headerfile ... [extra_libraries]] +B [B<-ACOPXacdfkmx>] [B<-F> addflags] [B<-M> fmask] [B<-n> module_name] [B<-o> tmask] [B<-p> prefix] [B<-s> subs] [B<-v> version] [B<-b> compat_version] [headerfile ... [extra_libraries]] B B<-h> diff --git a/vms/ext/DCLsym/Makefile.PL b/vms/ext/DCLsym/Makefile.PL index 84ab2be..28e2fa3 100644 --- a/vms/ext/DCLsym/Makefile.PL +++ b/vms/ext/DCLsym/Makefile.PL @@ -1,4 +1,4 @@ use ExtUtils::MakeMaker; WriteMakefile( 'VERSION_FROM' => 'DCLsym.pm', - 'MAN3PODS' => ' '); + 'MAN3PODS' => {}); diff --git a/vms/ext/Stdio/Makefile.PL b/vms/ext/Stdio/Makefile.PL index f5599f8..4e17a48 100644 --- a/vms/ext/Stdio/Makefile.PL +++ b/vms/ext/Stdio/Makefile.PL @@ -1,5 +1,5 @@ use ExtUtils::MakeMaker; WriteMakefile( 'VERSION_FROM' => 'Stdio.pm', - 'MAN3PODS' => ' ', # pods will be built later + 'MAN3PODS' => {}, # pods will be built later ); diff --git a/vms/gen_shrfls.pl b/vms/gen_shrfls.pl index 6c54c10..48499d4 100644 --- a/vms/gen_shrfls.pl +++ b/vms/gen_shrfls.pl @@ -172,7 +172,7 @@ if ($docc) { else { open(CPP,"$cpp_file") or die "$0: Can't read preprocessed file $cpp_file: $!\n"; } -%checkh = map { $_,1 } qw( thread bytecode byterun proto ); +%checkh = map { $_,1 } qw( thread bytecode byterun proto perlio ); $ckfunc = 0; LINE: while () { while (/^#.*vmsish\.h/i .. /^#.*perl\.h/i) { -- 2.7.4