From 8412ca27ca58f428d53d1a04ead1f93d20000056 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Thu, 21 Jul 2022 11:28:20 +0900 Subject: [PATCH] Imported Upstream version 6.05 --- Changes | 152 +- MANIFEST | 80 +- META.yml | 39 +- Makefile.PL | 60 +- README | 31 +- README.SSL | 29 +- bin/lwp-download | 8 +- bin/lwp-dump | 17 +- bin/lwp-mirror | 10 +- bin/lwp-request | 83 +- bin/lwp-rget | 607 ------- lib/Bundle/LWP.pm | 44 - lib/File/Listing.pm | 412 ----- lib/HTML/Form.pm | 1551 ----------------- lib/HTTP/Config.pm | 436 ----- lib/HTTP/Cookies.pm | 781 --------- lib/HTTP/Cookies/Microsoft.pm | 329 ---- lib/HTTP/Cookies/Netscape.pm | 114 -- lib/HTTP/Daemon.pm | 903 ---------- lib/HTTP/Date.pm | 389 ----- lib/HTTP/Headers.pm | 849 --------- lib/HTTP/Headers/Auth.pm | 98 -- lib/HTTP/Headers/ETag.pm | 94 - lib/HTTP/Headers/Util.pm | 199 --- lib/HTTP/Message.pm | 1102 ------------ lib/HTTP/Negotiate.pm | 529 ------ lib/HTTP/Request.pm | 242 --- lib/HTTP/Request/Common.pm | 511 ------ lib/HTTP/Response.pm | 641 ------- lib/HTTP/Status.pm | 254 --- lib/LWP.pm | 69 +- lib/LWP/Authen/Ntlm.pm | 2 +- lib/LWP/ConnCache.pm | 9 +- lib/LWP/MediaTypes.pm | 298 ---- lib/LWP/Protocol.pm | 2 +- lib/LWP/Protocol/http.pm | 71 +- lib/LWP/Protocol/http10.pm | 289 --- lib/LWP/Protocol/https.pm | 51 - lib/LWP/Protocol/https10.pm | 75 - lib/LWP/RobotUA.pm | 21 +- lib/LWP/Simple.pm | 2 +- lib/LWP/UserAgent.pm | 245 ++- lib/LWP/media.types | 1064 ----------- lib/Net/HTTP.pm | 279 --- lib/Net/HTTP/Methods.pm | 593 ------- lib/Net/HTTP/NB.pm | 105 -- lib/Net/HTTPS.pm | 59 - lib/WWW/RobotRules.pm | 445 ----- lib/WWW/RobotRules/AnyDBM_File.pm | 170 -- lwpcook.pod | 30 +- lwptut.pod | 36 +- t/TEST | 2 +- t/base/common-req.t | 213 --- t/base/cookies.t | 706 -------- t/base/date.t | 180 -- t/base/headers-auth.t | 42 - t/base/headers-etag.t | 29 - t/base/headers-util.t | 45 - t/base/headers.t | 446 ----- t/base/http-config.t | 71 - t/base/http.t | 201 --- t/base/listing.t | 91 - t/base/mediatypes.t | 105 -- t/base/message-charset.t | 127 -- t/base/message-old.t | 97 -- t/base/message-parts.t | 113 -- t/base/message.t | 512 ------ t/base/negotiate.t | 112 -- t/base/request.t | 32 - t/base/response.t | 102 -- t/base/status-old.t | 18 - t/base/status.t | 18 - t/base/ua.t | 62 +- t/html/form-maxlength.t | 60 - t/html/form-multi-select.t | 100 -- t/html/form-param.t | 72 - t/html/form-selector.t | 47 - t/html/form.t | 595 ------- t/live/apache-http10.t | 16 + t/live/apache-listing.t | 27 - t/live/apache.t | 50 - t/live/https.t | 20 - t/live/{jigsaw-auth-b.t => jigsaw/auth-b.t} | 0 t/live/{jigsaw-auth-d.t => jigsaw/auth-d.t} | 0 t/live/{jigsaw-chunk.t => jigsaw/chunk.t} | 0 t/live/{jigsaw-md5-get.t => jigsaw/md5-get.t} | 0 t/live/{jigsaw-md5.t => jigsaw/md5.t} | 0 t/live/{jigsaw-neg-get.t => jigsaw/neg-get.t} | 0 t/live/{jigsaw-neg.t => jigsaw/neg.t} | 0 t/live/{jigsaw-te.t => jigsaw/te.t} | 0 t/live/online.t | 13 + t/local/chunked.t | 184 -- t/local/http.t | 105 +- t/robot/rules-dbm.t | 128 -- t/robot/rules.t | 230 --- t/robot/ua-get.t | 2 + t/robot/ua.t | 2 + 97 files changed, 819 insertions(+), 18665 deletions(-) delete mode 100755 bin/lwp-rget delete mode 100644 lib/Bundle/LWP.pm delete mode 100644 lib/File/Listing.pm delete mode 100644 lib/HTML/Form.pm delete mode 100644 lib/HTTP/Config.pm delete mode 100644 lib/HTTP/Cookies.pm delete mode 100644 lib/HTTP/Cookies/Microsoft.pm delete mode 100644 lib/HTTP/Cookies/Netscape.pm delete mode 100644 lib/HTTP/Daemon.pm delete mode 100644 lib/HTTP/Date.pm delete mode 100644 lib/HTTP/Headers.pm delete mode 100644 lib/HTTP/Headers/Auth.pm delete mode 100644 lib/HTTP/Headers/ETag.pm delete mode 100644 lib/HTTP/Headers/Util.pm delete mode 100644 lib/HTTP/Message.pm delete mode 100644 lib/HTTP/Negotiate.pm delete mode 100644 lib/HTTP/Request.pm delete mode 100644 lib/HTTP/Request/Common.pm delete mode 100644 lib/HTTP/Response.pm delete mode 100644 lib/HTTP/Status.pm delete mode 100644 lib/LWP/MediaTypes.pm delete mode 100644 lib/LWP/Protocol/http10.pm delete mode 100644 lib/LWP/Protocol/https.pm delete mode 100644 lib/LWP/Protocol/https10.pm delete mode 100644 lib/LWP/media.types delete mode 100644 lib/Net/HTTP.pm delete mode 100644 lib/Net/HTTP/Methods.pm delete mode 100644 lib/Net/HTTP/NB.pm delete mode 100644 lib/Net/HTTPS.pm delete mode 100644 lib/WWW/RobotRules.pm delete mode 100644 lib/WWW/RobotRules/AnyDBM_File.pm delete mode 100644 t/base/common-req.t delete mode 100644 t/base/cookies.t delete mode 100644 t/base/date.t delete mode 100644 t/base/headers-auth.t delete mode 100644 t/base/headers-etag.t delete mode 100644 t/base/headers-util.t delete mode 100644 t/base/headers.t delete mode 100644 t/base/http-config.t delete mode 100644 t/base/http.t delete mode 100644 t/base/listing.t delete mode 100644 t/base/mediatypes.t delete mode 100644 t/base/message-charset.t delete mode 100644 t/base/message-old.t delete mode 100644 t/base/message-parts.t delete mode 100644 t/base/message.t delete mode 100644 t/base/negotiate.t delete mode 100644 t/base/request.t delete mode 100644 t/base/response.t delete mode 100644 t/base/status-old.t delete mode 100644 t/base/status.t delete mode 100644 t/html/form-maxlength.t delete mode 100644 t/html/form-multi-select.t delete mode 100644 t/html/form-param.t delete mode 100644 t/html/form-selector.t delete mode 100644 t/html/form.t create mode 100644 t/live/apache-http10.t delete mode 100644 t/live/apache-listing.t delete mode 100644 t/live/apache.t delete mode 100644 t/live/https.t rename t/live/{jigsaw-auth-b.t => jigsaw/auth-b.t} (100%) rename t/live/{jigsaw-auth-d.t => jigsaw/auth-d.t} (100%) rename t/live/{jigsaw-chunk.t => jigsaw/chunk.t} (100%) rename t/live/{jigsaw-md5-get.t => jigsaw/md5-get.t} (100%) rename t/live/{jigsaw-md5.t => jigsaw/md5.t} (100%) rename t/live/{jigsaw-neg-get.t => jigsaw/neg-get.t} (100%) rename t/live/{jigsaw-neg.t => jigsaw/neg.t} (100%) rename t/live/{jigsaw-te.t => jigsaw/te.t} (100%) create mode 100644 t/live/online.t delete mode 100644 t/local/chunked.t delete mode 100644 t/robot/rules-dbm.t delete mode 100644 t/robot/rules.t diff --git a/Changes b/Changes index 91881f0..f79e584 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,151 @@ +_______________________________________________________________________________ +2013-03-11 Release 6.05 + +Karen Etheridge (3): + Derive message from status code if it was not provided + Merge pull request #33 from tomhukins/fix-readme + fix typo in comment + +Ville Skyttä (3): + Spelling fixes. + Spelling fix. + Merge pull request #34 from berekuk/fix-github-path + +Gisle Aas (3): + Update repo URL + With Net::HTTP 6.04 we don't need our own can_read() and sysread override + $ENV{HTTP_PROXY} might override our test setup [RT#81381] + +Vyacheslav Matyukhin (1): + fix github url in perldoc + +Slaven Rezic (1): + * Pod is utf-8 + +Peter Rabbitson (1): + Match required perl in Makefile.PL + +Tom Hukins (1): + Fix Github URLs + + + + +_______________________________________________________________________________ +2012-02-18 Release 6.04 + +Gisle Aas (4): + Typo fix; envirionment [RT#72386] + Implement $ua->is_online test + Add separate option to enable the live jigsaw tests + Merge pull request #10 from trcjr/master + +Theodore Robert Campbell Jr (3): + now with put and delete helpers + updated POD + unit tests for ua->put and ua->delete + +Peter Rabbitson (1): + These modules work with 5.8.1 + + + + +_______________________________________________________________________________ +2011-10-15 Release 6.03 + +Ville Skyttä (7): + Link updates. + Attribute documentation wording improvements. + Don't parse robots.txt response content unless it's textual. + Decode robots.txt response content before attempting to parse it. + RobotUA robots.txt response parsing cleanups. + Don't parse HEAD of robots.txt responses. + Request handler doc grammar fixes. + +Gisle Aas (6): + Pass on HTTP/1.0 if set as request protocol + Remove outdated docs (not touched since 1996 :-) + Merge pull request #22 from madsen/RT67947-verify_hostname + PERL_LWP_ENV_PROXY tweaks + lwp-request didn't respect -H Content-type [RT#70488] + lwp-request -H didn't allow repeated headers + +Christopher J. Madsen (2): + verify_hostname defaults to 0 if ssl_opts provided [RT#67947] + Test verify_hostname setting + +Bryan Cardillo (1): + Fix expect header support to work with content refs. + +Moritz Onken (1): + add PERL_LWP_ENV_PROXY env variable to enable env_proxy globally + + + +_______________________________________________________________________________ +2011-03-27 Release 6.02 + +This is the release where we try to help the CPAN-toolchain be able to install +the modules required for https-support in LWP. We have done this by unbundling +the LWP::Protocol::https module from the libwww-perl distribution. In order to +have https support you now need to install (or depend on) 'LWP::Protocol::https' +and then this will make sure that all the prerequsite modules comes along. +See [RT#66838]. + +This release also removes the old http10 modules that has really been +deprecated since v5.60. These should have been removed at the v6.00 jump, but +I forgot. + + +Christopher J. Madsen (1): + Ignores env variables when ssl_opts provided [RT#66663] + +Gisle Aas (4): + Fix typo; Authen::NTLM [RT#66884] + +Yury Zavarin (1): + Support LWP::ConnCache->new(total_capacity => undef) + + + +_______________________________________________________________________________ +2011-03-09 Release 6.01 + +Add missing HTTP::Daemon dependency for the tests. + + + +_______________________________________________________________________________ +2011-03-08 Release 6.00 + +Unbundled all modules not in the LWP:: namespace from the libwww-perl +distribution. The new broken out CPAN distribtions are File-Listing, +HTML-Form, HTTP-Cookies, HTTP-Daemon, HTTP-Date, HTTP-Message, HTTP-Negotiate, +Net-HTTP, and WWW-RobotRules. libwww-perl-6 require these to be installed. + +This release also drops the unmaintained lwp-rget script from the distribution. + +Perl v5.8.8 or better is now required. For older versions of perl please stay +with libwww-perl-5.837. + +For https://... default to verified connections with require IO::Socket::SSL +and Mozilla::CA modules to be installed. Old behaviour can be requested by +setting the PERL_LWP_SSL_VERIFY_HOSTNAME environment variable to 0. The +LWP::UserAgent got new ssl_opts method to control this as well. + +Support internationalized URLs from command line scripts and in the proxy +environment variables. + +The lwp-dump script got new --request option. + +The lwp-request script got new -E option, contributed by Tony Finch. + +Protocol handlers and callbacks can raise HTTP::Response objects as exceptions. +This will abort the current request and make LWP return the raised response. + + + _______________________________________________________________________________ 2010-09-20 Release 5.837 @@ -183,12 +331,12 @@ phrstbrn (1): _______________________________________________________________________________ 2009-06-25 Release 5.828 -A quick new release to restore compatiblity with perl-5.6. +A quick new release to restore compatibility with perl-5.6. Gisle Aas (4): Less noisy behaviour when we can't download the documents - Restore perl-5.6 compatiblity [RT#47054] + Restore perl-5.6 compatibility [RT#47054] Don't decode US-ASCII and ISO-8859-1 content Some versions of Encode don't support UTF-16-BE [RT#47152] diff --git a/MANIFEST b/MANIFEST index 83eb8a9..a0658ee 100644 --- a/MANIFEST +++ b/MANIFEST @@ -8,27 +8,7 @@ bin/lwp-download Writes bin/lwp-download script bin/lwp-dump Writes bin/lwp-dump script bin/lwp-mirror Writes bin/lwp-mirror script bin/lwp-request Writes bin/lwp-request script -bin/lwp-rget Writes bin/lwp-rget script talk-to-ourself Are we able to run tests talk HTTP to local server? -lib/Bundle/LWP.pm Defines prereq modules -lib/File/Listing.pm Parse directory listings -lib/HTML/Form.pm Parse
...
-lib/HTTP/Config.pm Look up stuff based on request/reponse properties -lib/HTTP/Cookies.pm Cookie storage and management -lib/HTTP/Cookies/Netscape.pm Deal with the Netscape cookie file format -lib/HTTP/Cookies/Microsoft.pm Deal with the Microsoft MSIE cookie file format -lib/HTTP/Daemon.pm A simple httpd -lib/HTTP/Date.pm Date conversion routines -lib/HTTP/Headers.pm Class encapsulating HTTP Message headers -lib/HTTP/Headers/Auth.pm Some methods that deal with authorization. -lib/HTTP/Headers/ETag.pm Some methods that deal with entity tags -lib/HTTP/Headers/Util.pm Some utility functions for header values. -lib/HTTP/Message.pm Class encapsulating HTTP messages -lib/HTTP/Negotiate.pm Evaluate HTTP content negotiation algoritm -lib/HTTP/Request.pm Class encapsulating HTTP Requests -lib/HTTP/Request/Common.pm Generate common requests -lib/HTTP/Response.pm Class encapsulating HTTP Responses -lib/HTTP/Status.pm HTTP Status code processing lib/LWP.pm Includes what you need lib/LWP/Authen/Basic.pm Basic authentication scheme lib/LWP/Authen/Digest.pm Digest authentication scheme @@ -36,7 +16,6 @@ lib/LWP/Authen/Ntlm.pm NTLM authentication (Microsoft) lib/LWP/ConnCache.pm Connection cache lib/LWP/Debug.pm Debugging support lib/LWP/DebugFile.pm Send debug output to a file -lib/LWP/MediaTypes.pm Library for guessing media types lib/LWP/MemberMixin.pm Helps you access %$self lib/LWP/Protocol.pm Virtual base class for LWP protocols lib/LWP/Protocol/GHTTP.pm Alternative HTTP protocol handler @@ -46,9 +25,6 @@ lib/LWP/Protocol/file.pm Access local files lib/LWP/Protocol/ftp.pm Access with the FTP protocol lib/LWP/Protocol/gopher.pm Access with the Gopher protocol lib/LWP/Protocol/http.pm Access with HTTP/1.1 protocol -lib/LWP/Protocol/http10.pm Access with HTTP/1.0 protocol -lib/LWP/Protocol/https.pm Access with HTTP/1.1 protocol over SSL -lib/LWP/Protocol/https10.pm Access with HTTP/1.0 protocol over SSL lib/LWP/Protocol/loopback.pm Returns request (like HTTP TRACE) lib/LWP/Protocol/mailto.pm Allows you to POST mail using sendmail lib/LWP/Protocol/nntp.pm Handles access to news: and nntp: URLs @@ -56,58 +32,24 @@ lib/LWP/Protocol/nogo.pm Denies all requests. lib/LWP/RobotUA.pm Easy creation of conforming robots lib/LWP/Simple.pm Procedural LWP interface lib/LWP/UserAgent.pm A WWW UserAgent class -lib/LWP/media.types Mapping from file extentions media types -lib/Net/HTTP.pm Lower level HTTP on an IO::Socket::INET -lib/Net/HTTP/Methods.pm Lower level HTTP/1.1 protocol -lib/Net/HTTP/NB.pm Non-blocking HTTP -lib/Net/HTTPS.pm Lower level HTTP on an SSL socket -lib/WWW/RobotRules.pm Parse robot.txt files -lib/WWW/RobotRules/AnyDBM_File.pm Persistent robot rules lwpcook.pod Libwww-perl examples lwptut.pod Libwww-perl tutorial t/README How to run and set up tests t/TEST Run tests -t/base/common-req.t Test HTTP::Request::Common module -t/base/cookies.t Test HTTP::Cookies module -t/base/date.t Test HTTP::Date module -t/base/headers-auth.t Test HTTP::Headers::Auth module -t/base/headers-etag.t Test HTTP::Headers::ETag module -t/base/headers-util.t Test HTTP::Headers::Util module -t/base/headers.t Test HTTP::Headers module -t/base/http.t Test Net::HTTP::Methods module -t/base/http-config.t Test HTTP::Config module -t/base/listing.t Test File::Listing module -t/base/mediatypes.t Test LWP::MediaTypes module -t/base/message.t Test HTTP::Message -t/base/message-charset.t Test HTTP::Message content_charset method -t/base/message-old.t Test HTTP::Request/HTTP::Response -t/base/message-parts.t Test HTTP::Message parts method -t/base/negotiate.t Test HTTP::Negotiation module t/base/protocols.t Test protocol methods of LWP::UserAgent -t/base/request.t Test additional HTTP::Request methods -t/base/response.t Test additional HTTP::Response methods -t/base/status.t Test HTTP::Status module -t/base/status-old.t Test HTTP::Status module t/base/ua.t Basic LWP::UserAgent tests -t/html/form.t Test HTML::Form module -t/html/form-param.t More HTML::Form tests. -t/html/form-multi-select.t More HTML::Form tests -t/html/form-maxlength.t More HTML::Form tests -t/html/form-selector.t More HTML::Form tests -t/live/apache.t -t/live/apache-listing.t Test File::Listing::apache package -t/live/https.t -t/live/jigsaw-auth-b.t -t/live/jigsaw-auth-d.t -t/live/jigsaw-chunk.t -t/live/jigsaw-md5-get.t -t/live/jigsaw-md5.t -t/live/jigsaw-neg-get.t -t/live/jigsaw-neg.t -t/live/jigsaw-te.t +t/live/apache-http10.t +t/live/online.t +t/live/jigsaw/auth-b.t +t/live/jigsaw/auth-d.t +t/live/jigsaw/chunk.t +t/live/jigsaw/md5-get.t +t/live/jigsaw/md5.t +t/live/jigsaw/neg-get.t +t/live/jigsaw/neg.t +t/live/jigsaw/te.t t/local/autoload-get.t t/local/autoload.t Test autoloading of LWP::Protocol modules -t/local/chunked.t t/local/get.t Try to get a local file t/local/http.t Test http to local server t/local/protosub.t Test with other protocol module @@ -123,8 +65,6 @@ t/net/http-timeout.t t/net/mirror.t t/net/moved.t t/net/proxy.t -t/robot/rules-dbm.t Test WWW::RobotRules::AnyDBM_File -t/robot/rules.t Test WWW::RobotRules t/robot/ua-get.t t/robot/ua.t Test LWP::RobotUA META.yml Module meta-data (added by MakeMaker) diff --git a/META.yml b/META.yml index 1b4c4fb..bb5c922 100644 --- a/META.yml +++ b/META.yml @@ -1,6 +1,6 @@ --- #YAML:1.0 name: libwww-perl -version: 5.837 +version: 6.05 abstract: The World-Wide Web library for Perl author: - Gisle Aas @@ -11,29 +11,42 @@ configure_requires: build_requires: ExtUtils::MakeMaker: 0 requires: - Compress::Raw::Zlib: 0 Digest::MD5: 0 - HTML::Parser: 3.33 - HTML::Tagset: 0 - IO::Compress::Deflate: 0 - IO::Compress::Gzip: 0 - IO::Uncompress::Gunzip: 0 - IO::Uncompress::Inflate: 0 - IO::Uncompress::RawInflate: 0 + Encode: 2.12 + Encode::Locale: 0 + File::Listing: 6 + HTML::Entities: 0 + HTML::HeadParser: 0 + HTTP::Cookies: 6 + HTTP::Daemon: 6 + HTTP::Date: 6 + HTTP::Negotiate: 6 + HTTP::Request: 6 + HTTP::Request::Common: 6 + HTTP::Response: 6 + HTTP::Status: 6 + IO::Select: 0 + IO::Socket: 0 + LWP::MediaTypes: 6 MIME::Base64: 2.1 Net::FTP: 2.58 - perl: 5.006 + Net::HTTP: 6.04 + perl: 5.008001 URI: 1.10 + URI::Escape: 0 + WWW::RobotRules: 6 resources: MailingList: mailto:libwww@perl.org - repository: http://github.com/gisle/libwww-perl + repository: https://github.com/libwww-perl/libwww-perl no_index: directory: - t - inc -generated_by: ExtUtils::MakeMaker version 6.56 +generated_by: ExtUtils::MakeMaker version 6.57_05 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 recommends: - Crypt::SSLeay: 0 + Authen::NTLM: 1.02 + Data::Dump: 0 + LWP::Protocol::https: 6.02 diff --git a/Makefile.PL b/Makefile.PL index 8f3baf2..add9b30 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,6 +1,6 @@ #!perl -w -require 5.006; +require 5.008001; use strict; use ExtUtils::MakeMaker; use Getopt::Long qw(GetOptions); @@ -9,12 +9,13 @@ GetOptions(\my %opt, 'aliases', 'no-programs|n', 'live-tests', + 'jigsaw-tests', ) or do { - die "Usage: $0 [--aliases] [--no-programs] [--live-tests]\n"; + die "Usage: $0 [--aliases] [--no-programs] [--live-tests] [--jigsaw-tests]\n"; }; my @prog; -push(@prog, qw(lwp-request lwp-mirror lwp-rget lwp-download lwp-dump)) +push(@prog, qw(lwp-request lwp-mirror lwp-download lwp-dump)) unless $opt{'no-programs'} || grep /^LIB=/, @ARGV; if ($opt{'aliases'} && grep(/lwp-request/, @prog)) { @@ -29,6 +30,7 @@ if ($opt{'aliases'} && grep(/lwp-request/, @prog)) { system($^X, "talk-to-ourself"); flag_file("t/CAN_TALK_TO_OURSELF", $? == 0); flag_file("t/live/ENABLED", $opt{'live-tests'}); +flag_file("t/live/jigsaw/ENABLED", $opt{'jigsaw-tests'}); WriteMakefile( NAME => 'LWP', @@ -38,40 +40,46 @@ WriteMakefile( AUTHOR => 'Gisle Aas ', EXE_FILES => [ map "bin/$_", @prog ], LICENSE => "perl", - MIN_PERL_VERSION => 5.006, + MIN_PERL_VERSION => 5.008001, PREREQ_PM => { - 'URI' => "1.10", - 'MIME::Base64' => "2.1", - 'Net::FTP' => "2.58", - 'HTML::Tagset' => 0, - 'HTML::Parser' => "3.33", - 'Digest::MD5' => 0, - 'Compress::Raw::Zlib' => 0, - 'IO::Compress::Gzip' => 0, - 'IO::Compress::Deflate' => 0, - 'IO::Uncompress::Gunzip' => 0, - 'IO::Uncompress::Inflate' => 0, - 'IO::Uncompress::RawInflate' => 0, + 'Digest::MD5' => 0, + 'Encode' => "2.12", + 'Encode::Locale' => 0, + 'File::Listing' => 6, + 'HTML::Entities' => 0, + 'HTML::HeadParser' => 0, + 'HTTP::Cookies' => 6, + 'HTTP::Daemon' => 6, + 'HTTP::Date' => 6, + 'HTTP::Negotiate' => 6, + 'HTTP::Request' => 6, + 'HTTP::Request::Common' => 6, + 'HTTP::Response' => 6, + 'HTTP::Status' => 6, + 'IO::Select' => 0, + 'IO::Socket' => 0, + 'LWP::MediaTypes' => 6, + 'MIME::Base64' => "2.1", + 'Net::FTP' => "2.58", + 'Net::HTTP' => "6.04", + 'URI' => "1.10", + 'URI::Escape' => 0, + 'WWW::RobotRules' => 6, }, META_MERGE => { recommends => { - 'Crypt::SSLeay' => 0, + 'LWP::Protocol::https' => '6.02', + 'Authen::NTLM' => "1.02", + 'Data::Dump' => 0, }, resources => { - repository => 'http://github.com/gisle/libwww-perl', + repository => 'https://github.com/libwww-perl/libwww-perl', MailingList => 'mailto:libwww@perl.org', } }, clean => { FILES => join(" ", map "bin/$_", grep /^[A-Z]+$/, @prog) }, ); -if($] >= 5.008 && !(eval { require Encode; defined(Encode::decode("UTF-8", "\xff")) })) { - warn "\nYou lack a working Encode module, and so you will miss out on\n". - "lots of character set goodness from LWP. However, your perl is\n". - "sufficiently recent to support it. It is recommended that you\n". - "install the latest Encode from CPAN.\n\n"; -} - sub MY::test @@ -101,7 +109,7 @@ sub flag_file { BEGIN { # compatibility with older versions of MakeMaker - my $developer = -f "NOTES.txt"; + my $developer = -f ".gitignore"; my %mm_req = ( LICENCE => 6.31, META_MERGE => 6.45, diff --git a/README b/README index 0733b43..82dcc55 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ - L I B W W W - P E R L - 5 + L I B W W W - P E R L - 6 ----------------------------- @@ -14,22 +14,29 @@ help you implement simple HTTP servers. PREREQUISITES In order to install and use this package you will need Perl version -5.6 or better. Some modules within this package depend on other +5.8.1 or better. Some modules within this package depend on other packages that are distributed separately from Perl. We recommend that you have the following packages installed before you install libwww-perl: - URI - MIME-Base64 - HTML-Tagset + Digest-MD5 + Encode-Locale + HTML-Form HTML-Parser + HTML-Tagset + HTTP-Cookies + HTTP-Date + HTTP-Message + HTTP-Negotiate libnet - Digest-MD5 - Compress-Zlib + LWP-MediaTypes + MIME-Base64 + Net-HTTP + URI + WWW-RobotRules If you want to access sites using the https protocol, then you need to -install the Crypt::SSLeay or the IO::Socket::SSL module. The -README.SSL file will tell you more about how libwww-perl supports SSL. +install the LWP::Protocol::https module from CPAN. INSTALLATION @@ -75,16 +82,16 @@ The latest version of libwww-perl is available from CPAN: If you want to hack on the source it might be a good idea to grab the latest version with git using the command: - git clone git://github.com/gisle/libwww-perl.git lwp + git clone git://github.com/libwww-perl/libwww-perl.git lwp You can also browse the git repository at: - http://github.com/gisle/libwww-perl + https://github.com/libwww-perl/libwww-perl COPYRIGHT - © 1995-2009 Gisle Aas. All rights reserved. + © 1995-2010 Gisle Aas. All rights reserved. © 1995 Martijn Koster. All rights reserved. This library is free software; you can redistribute it and/or modify diff --git a/README.SSL b/README.SSL index 66a7b65..3c2202c 100644 --- a/README.SSL +++ b/README.SSL @@ -1,24 +1,7 @@ -SSL SUPPORT ------------ +As of libwww-perl v6.02 you need to install the LWP::Protocol::https module +from its own separate distribution to enable support for https://... URLs for +LWP::UserAgent. -The libwww-perl package has support for using SSL/TLSv1 with its HTTP -client and server classes. This support makes it possible to access -https schemed URLs with LWP. Because of the problematic status of -encryption software in general and certain encryption algorithms in -particular, in several countries, libwww-perl package doesn't include -SSL functionality out-of-the-box. - -Encryption support is obtained through the use of Crypt::SSLeay or -IO::Socket::SSL, which can both be found from CPAN. While libwww-perl -has "plug-and-play" support for both of these modules (as of v5.45), -the recommended module to use is Crypt::SSLeay. In addition to -bringing SSL support to the LWP package, IO::Socket::SSL can be used -as an object oriented interface to SSL encrypted network sockets. - -There is yet another SSL interface for perl called Net::SSLeay. It has -a more complete SSL interface and can be used for web client -programming among other things but doesn't directly support LWP. - -The underlying SSL support in all of these modules is based on OpenSSL - (formerly SSLeay). For WWW-server side SSL -support (e.g. CGI/FCGI scripts) in Apache see . +This makes it possible for that distribution to state the required dependencies +as non-optional. See for +further discussion why we ended up with this solution. diff --git a/bin/lwp-download b/bin/lwp-download index 180a0e0..e951888 100755 --- a/bin/lwp-download +++ b/bin/lwp-download @@ -61,6 +61,8 @@ use LWP::UserAgent (); use LWP::MediaTypes qw(guess_media_type media_suffix); use URI (); use HTTP::Date (); +use Encode; +use Encode::Locale; my $progname = $0; $progname =~ s,.*/,,; # only basename left in progname @@ -74,10 +76,10 @@ unless (getopts('as', \%opt)) { usage(); } -my $url = URI->new(shift || usage()); -my $argfile = shift; +my $url = URI->new(decode(locale => shift) || usage()); +my $argfile = encode(locale_fs => decode(locale => shift)); usage() if defined($argfile) && !length($argfile); -my $VERSION = "5.835"; +my $VERSION = "6.00"; my $ua = LWP::UserAgent->new( agent => "lwp-download/$VERSION ", diff --git a/bin/lwp-dump b/bin/lwp-dump index 1805eb5..5147830 100755 --- a/bin/lwp-dump +++ b/bin/lwp-dump @@ -3,8 +3,10 @@ use strict; use LWP::UserAgent (); use Getopt::Long qw(GetOptions); +use Encode; +use Encode::Locale; -my $VERSION = "5.827"; +my $VERSION = "6.00"; GetOptions(\my %opt, 'parse-head', @@ -12,6 +14,7 @@ GetOptions(\my %opt, 'keep-client-headers', 'method=s', 'agent=s', + 'request', ) || usage(); my $url = shift || usage(); @@ -28,6 +31,7 @@ Recognized options are: --max-length --method --parse-head + --request EOT } @@ -39,12 +43,17 @@ my $ua = LWP::UserAgent->new( agent => $opt{agent} || "lwp-dump/$VERSION ", ); -my $req = HTTP::Request->new($opt{method} || 'GET' => $url); +my $req = HTTP::Request->new($opt{method} || 'GET' => decode(locale => $url)); my $res = $ua->simple_request($req); $res->remove_header(grep /^Client-/, $res->header_field_names) unless $opt{'keep-client-headers'} or ($res->header("Client-Warning") || "") eq "Internal response"; +if ($opt{request}) { + $res->request->dump; + print "\n"; +} + $res->dump(maxlength => $opt{'max-length'}); __END__ @@ -99,6 +108,10 @@ By default B will not try to initialize headers by looking at the head section of HTML documents. This option enables this. This corresponds to L. +=item B<--request> + +Also dump the request sent. + =back =head1 SEE ALSO diff --git a/bin/lwp-mirror b/bin/lwp-mirror index 13da797..b66e54e 100755 --- a/bin/lwp-mirror +++ b/bin/lwp-mirror @@ -13,7 +13,7 @@ lwp-mirror - Simple mirror utility =head1 DESCRIPTION This program can be used to mirror a document from a WWW server. The -document is only transfered if the remote copy is newer than the local +document is only transferred if the remote copy is newer than the local copy. If the local copy is newer nothing happens. Use the C<-v> option to print the version number of this program. @@ -40,12 +40,14 @@ Gisle Aas use LWP::Simple qw(mirror is_success status_message $ua); use Getopt::Std; +use Encode; +use Encode::Locale; $progname = $0; $progname =~ s,.*/,,; # use basename only $progname =~ s/\.\w*$//; #strip extension if any -$VERSION = "5.810"; +$VERSION = "6.05"; $opt_h = undef; # print usage $opt_v = undef; # print version @@ -68,8 +70,8 @@ modify it under the same terms as Perl itself. EOT } -$url = shift or usage(); -$file = shift or usage(); +$url = decode(locale => shift) or usage(); +$file = encode(locale_fs => decode(locale => shift)) or usage(); usage() if $opt_h or @ARGV; if (defined $opt_t) { diff --git a/bin/lwp-request b/bin/lwp-request index ee9dbf8..c948fea 100755 --- a/bin/lwp-request +++ b/bin/lwp-request @@ -116,6 +116,10 @@ requests that are handled by the library. Print response headers. This option is always on for HEAD requests. +=item -E + +Print response status chain with full response headers. + =item -d Do B print the content of the response. @@ -180,13 +184,15 @@ $progname = $0; $progname =~ s,.*[\\/],,; # use basename only $progname =~ s/\.\w*$//; # strip extension, if any -$VERSION = "5.834"; +$VERSION = "6.03"; require LWP; use URI; use URI::Heuristic qw(uf_uri); +use Encode; +use Encode::Locale; use HTTP::Status qw(status_message); use HTTP::Date qw(time2str str2time); @@ -265,11 +271,12 @@ my @getopt_args = ( 'C=s', # credentials for basic authorization 'H=s@', # extra headers, form "Header: value string" # - 'u', # display method, URL and headers of request + 'u', # display method and URL of request 'U', # display request headers also 's', # display status code 'S', # display whole chain of status codes 'e', # display response headers (default for HEAD) + 'E', # display whole chain of headers 'd', # don't display content # 'h', # print usage @@ -320,12 +327,23 @@ elsif (!defined $allowed_methods{$method}) { die "$progname: $method is not an allowed method\n"; } +if ($options{'S'} || $options{'E'}) { + $options{'U'} = 1 if $options{'E'}; + $options{'E'} = 1 if $options{'e'}; + $options{'S'} = 1; + $options{'s'} = 1; + $options{'u'} = 1; +} + if ($method eq "HEAD") { $options{'s'} = 1; $options{'e'} = 1 unless $options{'d'}; $options{'d'} = 1; } +$options{'u'} = 1 if $options{'U'}; +$options{'s'} = 1 if $options{'e'}; + if (defined $options{'t'}) { $options{'t'} =~ /^(\d+)([smh])?/; die "$progname: Illegal timeout value!\n" unless defined $1; @@ -350,6 +368,7 @@ if (defined $options{'i'}) { } $content = undef; +$user_ct = undef; if ($allowed_methods{$method} eq "C") { # This request needs some content unless (defined $options{'c'}) { @@ -360,7 +379,8 @@ if ($allowed_methods{$method} eq "C") { } else { die "$progname: Illegal Content-type format\n" - unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$, + unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,; + $user_ct++; } print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n" if -t; @@ -376,26 +396,42 @@ else { $request = HTTP::Request->new($method); $request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'}; for my $user_header (@{ $options{'H'} || [] }) { - my ($header_name, $header_value) = split /:\s*/, $user_header, 2; - $request->header($header_name, $header_value); - $ua->agent($header_value) if lc($header_name) eq "user-agent"; # Ugh! + my ($header_name, $header_value) = split /\s*:\s*/, $user_header, 2; + $header_name =~ s/^\s+//; + if (lc($header_name) eq "user-agent") { + $header_value .= $ua->agent if $header_value =~ /\s\z/; + $ua->agent($header_value); + } + else { + $request->push_header($header_name, $header_value); + } } #$request->header('Accept', '*/*'); if ($options{'c'}) { # will always be set for request that wants content - $request->header('Content-Type', $options{'c'}); + my $header = ($user_ct ? 'header' : 'init_header'); + $request->$header('Content-Type', $options{'c'}); $request->header('Content-Length', length $content); # Not really needed $request->content($content); } $errors = 0; +sub show { + my $r = shift; + my $last = shift; + print $method, " ", $r->request->uri->as_string, "\n" if $options{'u'}; + print $r->request->headers_as_string, "\n" if $options{'U'}; + print $r->status_line, "\n" if $options{'s'}; + print $r->headers_as_string, "\n" if $options{'E'} or $last; +} + # Ok, now we perform the requests, one URL at a time while ($url = shift) { # Create the URL object, but protect us against bad URLs eval { if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification - $url = URI->new($url, $options{'b'}); - $url = $url->abs($options{'b'}) if $options{'b'}; + $url = URI->new(decode(locale => $url), decode(locale => $options{'b'})); + $url = $url->abs(decode(locale => $options{'b'})) if $options{'b'}; } else { $url = uf_uri($url); @@ -408,34 +444,18 @@ while ($url = shift) { next; } - $ua->proxy($url->scheme, $options{'p'}) if $options{'p'}; + $ua->proxy($url->scheme, decode(locale => $options{'p'})) if $options{'p'}; # Send the request and get a response back from the server $request->uri($url); $response = $ua->request($request); - if ($options{'u'} || $options{'U'}) { - my $url = $response->request->uri->as_string; - print "$method $url\n"; - print $response->request->headers_as_string, "\n" if $options{'U'}; - } - if ($options{'S'}) { - for my $r ($response->redirects, $response) { - my $method = $r->request->method; - my $url = $r->request->uri->as_string; - print "$method $url --> ", $r->status_line, "\n"; + for my $r ($response->redirects) { + show($r); } } - elsif ($options{'s'}) { - print $response->status_line, "\n"; - } - - if ($options{'e'}) { - # Display headers - print $response->headers_as_string; - print "\n"; # separate headers and content - } + show($response, $options{'e'}); unless ($options{'d'}) { if ($options{'o'} && @@ -520,8 +540,9 @@ Usage: $progname [-options] ... -u Display method and URL before any response -U Display request headers (implies -u) -s Display response status code - -S Display response status chain - -e Display response headers + -S Display response status chain (implies -u) + -e Display response headers (implies -s) + -E Display whole chain of headers (implies -S and -U) -d Do not display content -o Process HTML content in various ways diff --git a/bin/lwp-rget b/bin/lwp-rget deleted file mode 100755 index 2ac798f..0000000 --- a/bin/lwp-rget +++ /dev/null @@ -1,607 +0,0 @@ -#!/usr/bin/perl -w - -=head1 NAME - -lwp-rget - Retrieve web documents recursively - -=head1 SYNOPSIS - - lwp-rget [--verbose] [--auth=USER:PASS] [--depth=N] [--hier] [--iis] - [--keepext=mime/type[,mime/type]] [--limit=N] [--nospace] - [--prefix=URL] [--referer=URL] [--sleep=N] [--tolower] - lwp-rget --version - -=head1 DESCRIPTION - -This program will retrieve a document and store it in a local file. It -will follow any links found in the document and store these documents -as well, patching links so that they refer to these local copies. -This process continues until there are no more unvisited links or the -process is stopped by the one or more of the limits which can be -controlled by the command line arguments. - -This program is useful if you want to make a local copy of a -collection of documents or want to do web reading off-line. - -All documents are stored as plain files in the current directory. The -file names chosen are derived from the last component of URL paths. - -The options are: - -=over 3 - -=item --auth=USER:PASS - -Set the authentication credentials to user "USER" and password "PASS" if -any restricted parts of the web site are hit. If there are restricted -parts of the web site and authentication credentials are not available, -those pages will not be downloaded. - -=item --depth=I - -Limit the recursive level. Embedded images are always loaded, even if -they fall outside the I<--depth>. This means that one can use -I<--depth=0> in order to fetch a single document together with all -inline graphics. - -The default depth is 5. - -=item --hier - -Download files into a hierarchy that mimics the web site structure. -The default is to put all files in the current directory. - -=item --referer=I - -Set the value of the Referer header for the initial request. The -special value C<"NONE"> can be used to suppress the Referer header in -any of subsequent requests. The Referer header will always be suppressed -in all normal C requests if the referring page was transmitted over -C as recommended in RFC 2616. - -=item --iis - -Sends an "Accept: */*" on all URL requests as a workaround for a bug in -IIS 2.0. If no Accept MIME header is present, IIS 2.0 returns with a -"406 No acceptable objects were found" error. Also converts any back -slashes (\\) in URLs to forward slashes (/). - -=item --keepext=I - -Keeps the current extension for the list MIME types. Useful when -downloading text/plain documents that shouldn't all be translated to -*.txt files. - -=item --limit=I - -Limit the number of documents to get. The default limit is 50. - -=item --nospace - -Changes spaces in all URLs to underscore characters (_). Useful when -downloading files from sites serving URLs with spaces in them. Does not -remove spaces from fragments, e.g., "file.html#somewhere in here". - -=item --prefix=I - -Limit the links to follow. Only URLs that start the prefix string are -followed. - -The default prefix is set as the "directory" of the initial URL to -follow. For instance if we start lwp-rget with the URL -C, then prefix will be set to -C. - -Use C<--prefix=''> if you don't want the fetching to be limited by any -prefix. - -=item --sleep=I - -Sleep I seconds before retrieving each document. This options allows -you to go slowly, not loading the server you visiting too much. - -=item --tolower - -Translates all links to lowercase. Useful when downloading files from -IIS since it does not serve files in a case sensitive manner. - -=item --verbose - -Make more noise while running. - -=item --quiet - -Don't make any noise. - -=item --version - -Print program version number and quit. - -=item --help - -Print the usage message and quit. - -=back - -Before the program exits the name of the file, where the initial URL -is stored, is printed on stdout. All used filenames are also printed -on stderr as they are loaded. This printing can be suppressed with -the I<--quiet> option. - -=head1 SEE ALSO - -L, L - -=head1 AUTHOR - -Gisle Aas - -=cut - -use strict; - -use Getopt::Long qw(GetOptions); -use URI::URL qw(url); -use LWP::MediaTypes qw(media_suffix); -use HTML::Entities (); - -use vars qw($VERSION); -use vars qw($MAX_DEPTH $MAX_DOCS $PREFIX $REFERER $VERBOSE $QUIET $SLEEP $HIER $AUTH $IIS $TOLOWER $NOSPACE %KEEPEXT); - -my $progname = $0; -$progname =~ s|.*/||; # only basename left -$progname =~ s/\.\w*$//; #strip extension if any - -$VERSION = "5.827"; - -#$Getopt::Long::debug = 1; -#$Getopt::Long::ignorecase = 0; - -# Defaults -$MAX_DEPTH = 5; -$MAX_DOCS = 50; - -GetOptions('version' => \&print_version, - 'help' => \&usage, - 'depth=i' => \$MAX_DEPTH, - 'limit=i' => \$MAX_DOCS, - 'verbose!' => \$VERBOSE, - 'quiet!' => \$QUIET, - 'sleep=i' => \$SLEEP, - 'prefix:s' => \$PREFIX, - 'referer:s'=> \$REFERER, - 'hier' => \$HIER, - 'auth=s' => \$AUTH, - 'iis' => \$IIS, - 'tolower' => \$TOLOWER, - 'nospace' => \$NOSPACE, - 'keepext=s' => \$KEEPEXT{'OPT'}, - ) || usage(); - -sub print_version { - require LWP; - my $DISTNAME = 'libwww-perl-' . LWP::Version(); - print <<"EOT"; -This is lwp-rget version $VERSION ($DISTNAME) - -Copyright 1996-1998, Gisle Aas. - -This program is free software; you can redistribute it and/or -modify it under the same terms as Perl itself. -EOT - exit 0; -} - -my $start_url = shift || usage(); -usage() if @ARGV; - -require LWP::UserAgent; -my $ua = new LWP::UserAgent; -$ua->agent("$progname/$VERSION "); -$ua->env_proxy; - -unless (defined $PREFIX) { - $PREFIX = url($start_url); # limit to URLs below this one - eval { - $PREFIX->eparams(undef); - $PREFIX->equery(undef); - }; - - $_ = $PREFIX->epath; - s|[^/]+$||; - $PREFIX->epath($_); - $PREFIX = $PREFIX->as_string; -} - -%KEEPEXT = map { lc($_) => 1 } split(/\s*,\s*/, ($KEEPEXT{'OPT'}||0)); - -my $SUPPRESS_REFERER; -$SUPPRESS_REFERER++ if ($REFERER || "") eq "NONE"; - -print <<"" if $VERBOSE; -START = $start_url -MAX_DEPTH = $MAX_DEPTH -MAX_DOCS = $MAX_DOCS -PREFIX = $PREFIX - -my $no_docs = 0; -my %seen = (); # mapping from URL => local_file - -my $filename = fetch($start_url, undef, $REFERER); -print "$filename\n" unless $QUIET; - -sub fetch -{ - my($url, $type, $referer, $depth) = @_; - - # Fix http://sitename.com/../blah/blah.html to - # http://sitename.com/blah/blah.html - $url = $url->as_string if (ref($url)); - while ($url =~ s#(https?://[^/]+/)\.\.\/#$1#) {} - - # Fix backslashes (\) in URL if $IIS defined - $url = fix_backslashes($url) if (defined $IIS); - - $url = url($url); - $type ||= 'a'; - # Might be the background attribute - $type = 'img' if ($type eq 'body' || $type eq 'td'); - $depth ||= 0; - - # Print the URL before we start checking... - my $out = (" " x $depth) . $url . " "; - $out .= "." x (60 - length($out)); - print STDERR $out . " " if $VERBOSE; - - # Can't get mailto things - if ($url->scheme eq 'mailto') { - print STDERR "*skipping mailto*\n" if $VERBOSE; - return $url->as_string; - } - - # The $plain_url is a URL without the fragment part - my $plain_url = $url->clone; - $plain_url->frag(undef); - - # Check PREFIX, but not for links - if ($type ne 'img' and $url->as_string !~ /^\Q$PREFIX/o) { - print STDERR "*outsider*\n" if $VERBOSE; - return $url->as_string; - } - - # Translate URL to lowercase if $TOLOWER defined - $plain_url = to_lower($plain_url) if (defined $TOLOWER); - - # If we already have it, then there is nothing to be done - my $seen = $seen{$plain_url->as_string}; - if ($seen) { - my $frag = $url->frag; - $seen .= "#$frag" if defined($frag); - $seen = protect_frag_spaces($seen); - print STDERR "$seen (again)\n" if $VERBOSE; - return $seen; - } - - # Too much or too deep - if ($depth > $MAX_DEPTH and $type ne 'img') { - print STDERR "*too deep*\n" if $VERBOSE; - return $url; - } - if ($no_docs > $MAX_DOCS) { - print STDERR "*too many*\n" if $VERBOSE; - return $url; - } - - # Fetch document - $no_docs++; - sleep($SLEEP) if $SLEEP; - my $req = HTTP::Request->new(GET => $url); - # See: http://ftp.sunet.se/pub/NT/mirror-microsoft/kb/Q163/7/74.TXT - $req->header ('Accept', '*/*') if (defined $IIS); # GIF/JPG from IIS 2.0 - $req->authorization_basic(split (/:/, $AUTH)) if (defined $AUTH); - if ($referer && !$SUPPRESS_REFERER) { - if ($req->uri->scheme eq 'http') { - # RFC 2616, section 15.1.3 - $referer = url($referer) unless ref($referer); - undef $referer if ($referer->scheme || '') eq 'https'; - } - $req->referer($referer) if $referer; - } - my $res = $ua->request($req); - - # Check outcome - if ($res->is_success) { - my $doc = $res->content; - my $ct = $res->content_type; - my $name = find_name($res->request->uri, $ct); - print STDERR "$name\n" unless $QUIET; - $seen{$plain_url->as_string} = $name; - - # If the file is HTML, then we look for internal links - if ($ct eq "text/html") { - # Save an unprosessed version of the HTML document. This - # both reserves the name used, and it also ensures that we - # don't loose everything if this program is killed before - # we finish. - save($name, $doc); - my $base = $res->base; - - # Follow and substitute links... - $doc =~ -s/ - ( - <(img|a|body|area|frame|td)\b # some interesting tag - [^>]+ # still inside tag (not strictly correct) - \b(?:src|href|background) # some link attribute - \s*=\s* # = - ) - (?: # scope of OR-ing - (")([^"]*)" | # value in double quotes OR - (')([^']*)' | # value in single quotes OR - ([^\s>]+) # quoteless value - ) -/ - new_link($1, lc($2), $3||$5, HTML::Entities::decode($4||$6||$7), - $base, $name, "$url", $depth+1) -/giex; - # XXX - # The regular expression above is not strictly correct. - # It is not really possible to parse HTML with a single - # regular expression, but it is faster. Tags that might - # confuse us include: - # - # - # - } - save($name, $doc); - return $name; - } - else { - print STDERR $res->code . " " . $res->message . "\n" if $VERBOSE; - $seen{$plain_url->as_string} = $url->as_string; - return $url->as_string; - } -} - -sub new_link -{ - my($pre, $type, $quote, $url, $base, $localbase, $referer, $depth) = @_; - - $url = protect_frag_spaces($url); - - $url = fetch(url($url, $base)->abs, $type, $referer, $depth); - $url = url("file:$url", "file:$localbase")->rel - unless $url =~ /^[.+\-\w]+:/; - - $url = unprotect_frag_spaces($url); - - return $pre . $quote . $url . $quote; -} - - -sub protect_frag_spaces -{ - my ($url) = @_; - - $url = $url->as_string if (ref($url)); - - if ($url =~ m/^([^#]*#)(.+)$/) - { - my ($base, $frag) = ($1, $2); - $frag =~ s/ /%20/g; - $url = $base . $frag; - } - - return $url; -} - - -sub unprotect_frag_spaces -{ - my ($url) = @_; - - $url = $url->as_string if (ref($url)); - - if ($url =~ m/^([^#]*#)(.+)$/) - { - my ($base, $frag) = ($1, $2); - $frag =~ s/%20/ /g; - $url = $base . $frag; - } - - return $url; -} - - -sub fix_backslashes -{ - my ($url) = @_; - my ($base, $frag); - - $url = $url->as_string if (ref($url)); - - if ($url =~ m/([^#]+)(#.*)/) - { - ($base, $frag) = ($1, $2); - } - else - { - $base = $url; - $frag = ""; - } - - $base =~ tr/\\/\//; - $base =~ s/%5[cC]/\//g; # URL-encoded back slash is %5C - - return $base . $frag; -} - - -sub to_lower -{ - my ($url) = @_; - my $was_object = 0; - - if (ref($url)) - { - $url = $url->as_string; - $was_object = 1; - } - - if ($url =~ m/([^#]+)(#.*)/) - { - $url = lc($1) . $2; - } - else - { - $url = lc($url); - } - - if ($was_object == 1) - { - return url($url); - } - else - { - return $url; - } -} - - -sub translate_spaces -{ - my ($url) = @_; - my ($base, $frag); - - $url = $url->as_string if (ref($url)); - - if ($url =~ m/([^#]+)(#.*)/) - { - ($base, $frag) = ($1, $2); - } - else - { - $base = $url; - $frag = ""; - } - - $base =~ s/^ *//; # Remove initial spaces from base - $base =~ s/ *$//; # Remove trailing spaces from base - - $base =~ tr/ /_/; - $base =~ s/%20/_/g; # URL-encoded space is %20 - - return $base . $frag; -} - - -sub mkdirp -{ - my($directory, $mode) = @_; - my @dirs = split(/\//, $directory); - my $path = shift(@dirs); # build it as we go - my $result = 1; # assume it will work - - unless (-d $path) { - $result &&= mkdir($path, $mode); - } - - foreach (@dirs) { - $path .= "/$_"; - if ( ! -d $path) { - $result &&= mkdir($path, $mode); - } - } - - return $result; -} - - -sub find_name -{ - my($url, $type) = @_; - #print "find_name($url, $type)\n"; - - # Translate spaces in URL to underscores (_) if $NOSPACE defined - $url = translate_spaces($url) if (defined $NOSPACE); - - # Translate URL to lowercase if $TOLOWER defined - $url = to_lower($url) if (defined $TOLOWER); - - $url = url($url) unless ref($url); - - my $path = $url->path; - - # trim path until only the basename is left - $path =~ s|(.*/)||; - my $dirname = ".$1"; - if (!$HIER) { - $dirname = ""; - } - elsif (! -d $dirname) { - mkdirp($dirname, 0775); - } - - my $extra = ""; # something to make the name unique - my $suffix; - - if ($KEEPEXT{lc($type)}) { - $suffix = ($path =~ m/\.(.*)/) ? $1 : ""; - } - else { - $suffix = media_suffix($type); - } - - $path =~ s|\..*||; # trim suffix - $path = "index" unless length $path; - - while (1) { - # Construct a new file name - my $file = $dirname . $path . $extra; - $file .= ".$suffix" if $suffix; - # Check if it is unique - return $file unless -f $file; - - # Try something extra - unless ($extra) { - $extra = "001"; - next; - } - $extra++; - } -} - - -sub save -{ - my $name = shift; - #print "save($name,...)\n"; - open(FILE, ">$name") || die "Can't save $name: $!"; - binmode FILE; - print FILE $_[0]; - close(FILE); -} - - -sub usage -{ - print <<""; exit 1; -Usage: $progname [options] -Allowed options are: - --auth=USER:PASS Set authentication credentials for web site - --depth=N Maximum depth to traverse (default is $MAX_DEPTH) - --hier Download into hierarchy (not all files into cwd) - --referer=URI Set initial referer header (or "NONE") - --iis Workaround IIS 2.0 bug by sending "Accept: */*" MIME - header; translates backslashes (\\) to forward slashes (/) - --keepext=type Keep file extension for MIME types (comma-separated list) - --limit=N A limit on the number documents to get (default is $MAX_DOCS) - --nospace Translate spaces URLs (not #fragments) to underscores (_) - --version Print version number and quit - --verbose More output - --quiet No output - --sleep=SECS Sleep between gets, ie. go slowly - --prefix=PREFIX Limit URLs to follow to those which begin with PREFIX - --tolower Translate all URLs to lowercase (useful with IIS servers) - -} diff --git a/lib/Bundle/LWP.pm b/lib/Bundle/LWP.pm deleted file mode 100644 index 1f2f045..0000000 --- a/lib/Bundle/LWP.pm +++ /dev/null @@ -1,44 +0,0 @@ -package Bundle::LWP; - -$VERSION = "5.835"; - -1; - -__END__ - -=head1 NAME - -Bundle::LWP - install all libwww-perl related modules - -=head1 SYNOPSIS - - perl -MCPAN -e 'install Bundle::LWP' - -=head1 CONTENTS - -MIME::Base64 - Used in authentication headers - -Digest::MD5 - Needed to do Digest authentication - -URI 1.10 - There are URIs everywhere - -Net::FTP 2.58 - If you want ftp://-support - -HTML::Tagset - Needed by HTML::Parser - -HTML::Parser - Needed by HTML::HeadParser - -HTML::HeadParser - To get the correct $res->base - -LWP - The reason why you need the modules above - -=head1 DESCRIPTION - -This bundle defines all prerequisite modules for libwww-perl. Bundles -have special meaning for the CPAN module. When you install the bundle -module all modules mentioned in L will be installed -instead. - -=head1 SEE ALSO - -L diff --git a/lib/File/Listing.pm b/lib/File/Listing.pm deleted file mode 100644 index 53a6ddc..0000000 --- a/lib/File/Listing.pm +++ /dev/null @@ -1,412 +0,0 @@ -package File::Listing; - -sub Version { $VERSION; } -$VERSION = "5.837"; - -require Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(parse_dir); - -use strict; - -use Carp (); -use HTTP::Date qw(str2time); - - - -sub parse_dir ($;$$$) -{ - my($dir, $tz, $fstype, $error) = @_; - - $fstype ||= 'unix'; - $fstype = "File::Listing::" . lc $fstype; - - my @args = $_[0]; - push(@args, $tz) if(@_ >= 2); - push(@args, $error) if(@_ >= 4); - - $fstype->parse(@args); -} - - -sub line { Carp::croak("Not implemented yet"); } -sub init { } # Dummy sub - - -sub file_mode ($) -{ - # This routine was originally borrowed from Graham Barr's - # Net::FTP package. - - local $_ = shift; - my $mode = 0; - my($type,$ch); - - s/^(.)// and $type = $1; - - while (/(.)/g) { - $mode <<= 1; - $mode |= 1 if $1 ne "-" && - $1 ne 'S' && - $1 ne 't' && - $1 ne 'T'; - } - - $type eq "d" and $mode |= 0040000 or # Directory - $type eq "l" and $mode |= 0120000 or # Symbolic Link - $mode |= 0100000; # Regular File - - $mode |= 0004000 if /^...s....../i; - $mode |= 0002000 if /^......s.../i; - $mode |= 0001000 if /^.........t/i; - - $mode; -} - - -sub parse -{ - my($pkg, $dir, $tz, $error) = @_; - - # First let's try to determine what kind of dir parameter we have - # received. We allow both listings, reference to arrays and - # file handles to read from. - - if (ref($dir) eq 'ARRAY') { - # Already splitted up - } - elsif (ref($dir) eq 'GLOB') { - # A file handle - } - elsif (ref($dir)) { - Carp::croak("Illegal argument to parse_dir()"); - } - elsif ($dir =~ /^\*\w+(::\w+)+$/) { - # This scalar looks like a file handle, so we assume it is - } - else { - # A normal scalar listing - $dir = [ split(/\n/, $dir) ]; - } - - $pkg->init(); - - my @files = (); - if (ref($dir) eq 'ARRAY') { - for (@$dir) { - push(@files, $pkg->line($_, $tz, $error)); - } - } - else { - local($_); - while (<$dir>) { - chomp; - push(@files, $pkg->line($_, $tz, $error)); - } - } - wantarray ? @files : \@files; -} - - - -package File::Listing::unix; - -use HTTP::Date qw(str2time); - -# A place to remember current directory from last line parsed. -use vars qw($curdir @ISA); - -@ISA = qw(File::Listing); - - - -sub init -{ - $curdir = ''; -} - - -sub line -{ - shift; # package name - local($_) = shift; - my($tz, $error) = @_; - - s/\015//g; - #study; - - my ($kind, $size, $date, $name); - if (($kind, $size, $date, $name) = - /^([\-FlrwxsStTdD]{10}) # Type and permission bits - .* # Graps - \D(\d+) # File size - \s+ # Some space - (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date - \s+ # Some more space - (.*)$ # File name - /x ) - - { - return if $name eq '.' || $name eq '..'; - $name = "$curdir/$name" if length $curdir; - my $type = '?'; - if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) { - $name = $1; - $type = "l $2"; - } - elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file - $type = 'f'; - } - elsif ($kind =~ /^[dD]/) { - $type = 'd'; - $size = undef; # Don't believe the reported size - } - return [$name, $type, $size, str2time($date, $tz), - File::Listing::file_mode($kind)]; - - } - elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) { - my $dir = $1; - return () if $dir eq '.'; - $curdir = $dir; - return (); - } - elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) { - return (); - } - elsif (/not found/ || # OSF1, HPUX, and SunOS return - # "$file not found" - /No such file/ || # IRIX returns - # "UX:ls: ERROR: Cannot access $file: No such file or directory" - # Solaris returns - # "$file: No such file or directory" - /cannot find/ # Windows NT returns - # "The system cannot find the path specified." - ) { - return () unless defined $error; - &$error($_) if ref($error) eq 'CODE'; - warn "Error: $_\n" if $error eq 'warn'; - return (); - } - elsif ($_ eq '') { # AIX, and Linux return nothing - return () unless defined $error; - &$error("No such file or directory") if ref($error) eq 'CODE'; - warn "Warning: No such file or directory\n" if $error eq 'warn'; - return (); - } - else { - # parse failed, check if the dosftp parse understands it - File::Listing::dosftp->init(); - return(File::Listing::dosftp->line($_,$tz,$error)); - } - -} - - - -package File::Listing::dosftp; - -use HTTP::Date qw(str2time); - -# A place to remember current directory from last line parsed. -use vars qw($curdir @ISA); - -@ISA = qw(File::Listing); - - - -sub init -{ - $curdir = ''; -} - - -sub line -{ - shift; # package name - local($_) = shift; - my($tz, $error) = @_; - - s/\015//g; - - my ($date, $size_or_dir, $name, $size); - - # 02-05-96 10:48AM 1415 src.slf - # 09-10-96 09:18AM sl_util - if (($date, $size_or_dir, $name) = - /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info - \s+ # Some space - (<\w{3}>|\d+) # Dir or Size - \s+ # Some more space - (.+)$ # File name - /x ) - { - return if $name eq '.' || $name eq '..'; - $name = "$curdir/$name" if length $curdir; - my $type = '?'; - if ($size_or_dir eq '') { - $type = "d"; - $size = ""; # directories have no size in the pc listing - } - else { - $type = 'f'; - $size = $size_or_dir; - } - return [$name, $type, $size, str2time($date, $tz), undef]; - } - else { - return () unless defined $error; - &$error($_) if ref($error) eq 'CODE'; - warn "Can't parse: $_\n" if $error eq 'warn'; - return (); - } - -} - - - -package File::Listing::vms; -@File::Listing::vms::ISA = qw(File::Listing); - -package File::Listing::netware; -@File::Listing::netware::ISA = qw(File::Listing); - - - -package File::Listing::apache; - -use vars qw(@ISA); - -@ISA = qw(File::Listing); - - -sub init { } - - -sub line { - shift; # package name - local($_) = shift; - my($tz, $error) = @_; # ignored for now... - - if (m!.*.*?(\d+)-([a-zA-Z]+|\d+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kMG]?|-))!i) { - my($filename, $filesize) = ($1, $7); - my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6); - if ($m =~ /^\d+$/) { - ($d,$y) = ($y,$d) # iso date - } - else { - $m = _monthabbrev_number($m); - } - - $filesize = 0 if $filesize eq '-'; - if ($filesize =~ s/k$//i) { - $filesize *= 1024; - } - elsif ($filesize =~ s/M$//) { - $filesize *= 1024*1024; - } - elsif ($filesize =~ s/G$//) { - $filesize *= 1024*1024*1024; - } - $filesize = int $filesize; - - require Time::Local; - my $filetime = Time::Local::timelocal(0,$M,$H,$d,$m-1,_guess_year($y)-1900); - my $filetype = ($filename =~ s|/$|| ? "d" : "f"); - return [$filename, $filetype, $filesize, $filetime, undef]; - } - - return (); -} - - -sub _guess_year { - my $y = shift; - if ($y >= 90) { - $y = 1900+$y; - } - elsif ($y < 100) { - $y = 2000+$y; - } - $y; -} - - -sub _monthabbrev_number { - my $mon = shift; - +{'Jan' => 1, - 'Feb' => 2, - 'Mar' => 3, - 'Apr' => 4, - 'May' => 5, - 'Jun' => 6, - 'Jul' => 7, - 'Aug' => 8, - 'Sep' => 9, - 'Oct' => 10, - 'Nov' => 11, - 'Dec' => 12, - }->{$mon}; -} - - -1; - -__END__ - -=head1 NAME - -File::Listing - parse directory listing - -=head1 SYNOPSIS - - use File::Listing qw(parse_dir); - $ENV{LANG} = "C"; # dates in non-English locales not supported - for (parse_dir(`ls -l`)) { - ($name, $type, $size, $mtime, $mode) = @$_; - next if $type ne 'f'; # plain file - #... - } - - # directory listing can also be read from a file - open(LISTING, "zcat ls-lR.gz|"); - $dir = parse_dir(\*LISTING, '+0000'); - -=head1 DESCRIPTION - -This module exports a single function called parse_dir(), which can be -used to parse directory listings. - -The first parameter to parse_dir() is the directory listing to parse. -It can be a scalar, a reference to an array of directory lines or a -glob representing a filehandle to read the directory listing from. - -The second parameter is the time zone to use when parsing time stamps -in the listing. If this value is undefined, then the local time zone is -assumed. - -The third parameter is the type of listing to assume. Currently -supported formats are 'unix', 'apache' and 'dosftp'. The default -value 'unix'. Ideally, the listing type should be determined -automatically. - -The fourth parameter specifies how unparseable lines should be treated. -Values can be 'ignore', 'warn' or a code reference. Warn means that -the perl warn() function will be called. If a code reference is -passed, then this routine will be called and the return value from it -will be incorporated in the listing. The default is 'ignore'. - -Only the first parameter is mandatory. - -The return value from parse_dir() is a list of directory entries. In -a scalar context the return value is a reference to the list. The -directory entries are represented by an array consisting of [ -$filename, $filetype, $filesize, $filetime, $filemode ]. The -$filetype value is one of the letters 'f', 'd', 'l' or '?'. The -$filetime value is the seconds since Jan 1, 1970. The -$filemode is a bitmask like the mode returned by stat(). - -=head1 CREDITS - -Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and -Net::FTP's parse_dir (Graham Barr). diff --git a/lib/HTML/Form.pm b/lib/HTML/Form.pm deleted file mode 100644 index bbbd777..0000000 --- a/lib/HTML/Form.pm +++ /dev/null @@ -1,1551 +0,0 @@ -package HTML::Form; - -use strict; -use URI; -use Carp (); - -use vars qw($VERSION $Encode_available); -$VERSION = "5.829"; - -eval { require Encode }; -$Encode_available = !$@; - -my %form_tags = map {$_ => 1} qw(input textarea button select option); - -my %type2class = ( - text => "TextInput", - password => "TextInput", - hidden => "TextInput", - textarea => "TextInput", - - "reset" => "IgnoreInput", - - radio => "ListInput", - checkbox => "ListInput", - option => "ListInput", - - button => "SubmitInput", - submit => "SubmitInput", - image => "ImageInput", - file => "FileInput", - - keygen => "KeygenInput", -); - -=head1 NAME - -HTML::Form - Class that represents an HTML form element - -=head1 SYNOPSIS - - use HTML::Form; - $form = HTML::Form->parse($html, $base_uri); - $form->value(query => "Perl"); - - use LWP::UserAgent; - $ua = LWP::UserAgent->new; - $response = $ua->request($form->click); - -=head1 DESCRIPTION - -Objects of the C class represents a single HTML -CformE ... E/formE> instance. A form consists of a -sequence of inputs that usually have names, and which can take on -various values. The state of a form can be tweaked and it can then be -asked to provide C objects that can be passed to the -request() method of C. - -The following methods are available: - -=over 4 - -=item @forms = HTML::Form->parse( $html_document, $base_uri ) - -=item @forms = HTML::Form->parse( $html_document, base => $base_uri, %opt ) - -=item @forms = HTML::Form->parse( $response, %opt ) - -The parse() class method will parse an HTML document and build up -C objects for each
element found. If called in scalar -context only returns the first . Returns an empty list if there -are no forms to be found. - -The required arguments is the HTML document to parse ($html_document) and the -URI used to retrieve the document ($base_uri). The base URI is needed to resolve -relative action URIs. The provided HTML document should be a Unicode string -(or US-ASCII). - -By default HTML::Form assumes that the original document was UTF-8 encoded and -thus encode forms that don't specify an explict I as UTF-8. -The charset assumed can be overridden by providing the C option to -parse(). It's a good idea to be explict about this parameter as well, thus -the recommended simplest invocation becomes: - - my @forms = HTML::Form->parse( - Encode::decode($encoding, $html_document_bytes), - base => $base_uri, - charset => $encoding, - ); - -If the document was retrieved with LWP then the response object provide methods -to obtain a proper value for C and C: - - my $ua = LWP::UserAgent->new; - my $response = $ua->get("http://www.example.com/form.html"); - my @forms = HTML::Form->parse($response->decoded_content, - base => $response->base, - charset => $response->content_charset, - ); - -In fact, the parse() method can parse from an C object -directly, so the example above can be more conveniently written as: - - my $ua = LWP::UserAgent->new; - my $response = $ua->get("http://www.example.com/form.html"); - my @forms = HTML::Form->parse($response); - -Note that any object that implements a decoded_content(), base() and -content_charset() method with similar behaviour as C will do. - -Additional options might be passed in to control how the parse method -behaves. The following are all the options currently recognized: - -=over - -=item C<< base => $uri >> - -This is the URI used to retrive the original document. This option is not optional ;-) - -=item C<< charset => $str >> - -Specify what charset the original document was encoded in. This is used as -the default for accept_charset. If not provided this defaults to "UTF-8". - -=item C<< verbose => $bool >> - -Warn (print messages to STDERR) about any bad HTML form constructs found. -You can trap these with $SIG{__WARN__}. - -=item C<< strict => $bool >> - -Initialize any form objects with the given strict attribute. - -=back - -=cut - -sub parse -{ - my $class = shift; - my $html = shift; - unshift(@_, "base") if @_ == 1; - my %opt = @_; - - require HTML::TokeParser; - my $p = HTML::TokeParser->new(ref($html) ? $html->decoded_content(ref => 1) : \$html); - die "Failed to create HTML::TokeParser object" unless $p; - - my $base_uri = delete $opt{base}; - my $charset = delete $opt{charset}; - my $strict = delete $opt{strict}; - my $verbose = delete $opt{verbose}; - - if ($^W) { - Carp::carp("Unrecognized option $_ in HTML::Form->parse") for sort keys %opt; - } - - unless (defined $base_uri) { - if (ref($html)) { - $base_uri = $html->base; - } - else { - Carp::croak("HTML::Form::parse: No \$base_uri provided"); - } - } - unless (defined $charset) { - if (ref($html) and $html->can("content_charset")) { - $charset = $html->content_charset; - } - unless ($charset) { - $charset = "UTF-8"; - } - } - - my @forms; - my $f; # current form - - my %openselect; # index to the open instance of a select - - while (my $t = $p->get_tag) { - my($tag,$attr) = @$t; - if ($tag eq "form") { - my $action = delete $attr->{'action'}; - $action = "" unless defined $action; - $action = URI->new_abs($action, $base_uri); - $f = $class->new($attr->{'method'}, - $action, - $attr->{'enctype'}); - $f->accept_charset($attr->{'accept-charset'}) if $attr->{'accept-charset'}; - $f->{default_charset} = $charset; - $f->{attr} = $attr; - $f->strict(1) if $strict; - %openselect = (); - push(@forms, $f); - my(%labels, $current_label); - while (my $t = $p->get_tag) { - my($tag, $attr) = @$t; - last if $tag eq "/form"; - - # if we are inside a label tag, then keep - # appending any text to the current label - if(defined $current_label) { - $current_label = join " ", - grep { defined and length } - $current_label, - $p->get_phrase; - } - - if ($tag eq "input") { - $attr->{value_name} = - exists $attr->{id} && exists $labels{$attr->{id}} ? $labels{$attr->{id}} : - defined $current_label ? $current_label : - $p->get_phrase; - } - - if ($tag eq "label") { - $current_label = $p->get_phrase; - $labels{ $attr->{for} } = $current_label - if exists $attr->{for}; - } - elsif ($tag eq "/label") { - $current_label = undef; - } - elsif ($tag eq "input") { - my $type = delete $attr->{type} || "text"; - $f->push_input($type, $attr, $verbose); - } - elsif ($tag eq "button") { - my $type = delete $attr->{type} || "submit"; - $f->push_input($type, $attr, $verbose); - } - elsif ($tag eq "textarea") { - $attr->{textarea_value} = $attr->{value} - if exists $attr->{value}; - my $text = $p->get_text("/textarea"); - $attr->{value} = $text; - $f->push_input("textarea", $attr, $verbose); - } - elsif ($tag eq "select") { - # rename attributes reserved to come for the option tag - for ("value", "value_name") { - $attr->{"select_$_"} = delete $attr->{$_} - if exists $attr->{$_}; - } - # count this new select option separately - my $name = $attr->{name}; - $name = "" unless defined $name; - $openselect{$name}++; - - while ($t = $p->get_tag) { - my $tag = shift @$t; - last if $tag eq "/select"; - next if $tag =~ m,/?optgroup,; - next if $tag eq "/option"; - if ($tag eq "option") { - my %a = %{$t->[0]}; - # rename keys so they don't clash with %attr - for (keys %a) { - next if $_ eq "value"; - $a{"option_$_"} = delete $a{$_}; - } - while (my($k,$v) = each %$attr) { - $a{$k} = $v; - } - $a{value_name} = $p->get_trimmed_text; - $a{value} = delete $a{value_name} - unless defined $a{value}; - $a{idx} = $openselect{$name}; - $f->push_input("option", \%a, $verbose); - } - else { - warn("Bad here, so we - # try to do the same. Actually the MSIE behaviour - # appears really strange: and - - - - - -EOT - -#print $f->dump; -#print $f->click->as_string; - -ok($f->click->as_string, <<'EOT'); -POST http://localhost/ -Content-Length: 69 -Content-Type: application/x-www-form-urlencoded - -i.x=1&i.y=1&c=on&r=b&t=&p=&h=xyzzy&f=&x=&a=%0Aabc%0A+++&s=bar&m=a&m=b -EOT - -ok(@warn, 1); -ok($warn[0] =~ /^Unknown input type 'xyzzy'/); -@warn = (); - -$f = HTML::Form->parse(<<'EOT', "http://localhost/"); -
- - - -
-EOT - -#$f->dump; -ok($f->click->as_string, <<'EOT'); -GET http://localhost/?x=1&y=1&t=1 - -EOT - -# test file upload -$f = HTML::Form->parse(<<'EOT', "http://localhost/"); -
- - -
-EOT - -#print $f->dump; -#print $f->click->as_string; - -ok($f->click->as_string, <<'EOT'); -POST http://localhost/ -Content-Length: 0 -Content-Type: multipart/form-data; boundary=none - -EOT - -my $filename = sprintf "foo-%08d.txt", $$; -die if -e $filename; - -open(FILE, ">$filename") || die; -binmode(FILE); -print FILE "This is some text\n"; -close(FILE) || die; - -$f->value(f => $filename); - -#print $f->click->as_string; - -ok($f->click->as_string, <<"EOT"); -POST http://localhost/ -Content-Length: 139 -Content-Type: multipart/form-data; boundary=xYzZY - ---xYzZY\r -Content-Disposition: form-data; name="f"; filename="$filename"\r -Content-Type: text/plain\r -\r -This is some text -\r ---xYzZY--\r -EOT - -unlink($filename) || warn "Can't unlink '$filename': $!"; - -ok(@warn, 0); - -# Try to parse form HTTP::Response directly -{ - package MyResponse; - use vars qw(@ISA); - require HTTP::Response; - @ISA = ('HTTP::Response'); - - sub base { "http://www.example.com" } -} -my $response = MyResponse->new(200, 'OK'); -$response->content("
"); - -$f = HTML::Form->parse($response); - -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com?x=42 - -EOT - -$f = HTML::Form->parse(< - I like it! - -EOT - -$f->find_input("x")->check; - -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com?x=on - -EOT - -$f->value("x", "off"); -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com - -EOT - -$f->value("x", "I like it!"); -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com?x=on - -EOT - -$f->value("x", "I LIKE IT!"); -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com?x=on - -EOT - -$f = HTML::Form->parse(< - - - -EOT - -$f->value("x", "one"); - -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com?x=1 - -EOT - -$f->value("x", "TWO"); -ok($f->click->as_string, <<"EOT"); -GET http://www.example.com?x=2 - -EOT - -ok(join(":", $f->find_input("x")->value_names), "one:two:3"); -ok(join(":", map $_->name, $f->find_input(undef, "option")), "x:y"); - -$f = HTML::Form->parse(< - - - - -EOT - -ok($f->value("x"), 1); -ok($f->value("y"), 2); -ok($f->value("z"), 3); -ok($f->click->uri->query, "y=2&z=3"); - -my $input = $f->find_input("x"); -ok($input->type, "text"); -ok(!$input->readonly); -ok($input->disabled); -ok($input->disabled(0)); -ok(!$input->disabled); -ok($f->click->uri->query, "x=1&y=2&z=3"); - -$input = $f->find_input("y"); -ok($input->type, "text"); -ok($input->readonly); -ok(!$input->disabled); -$input->value(22); -ok($f->click->uri->query, "x=1&y=22&z=3"); - -$input->strict(1); -eval { - $input->value(23); -}; -ok($@ =~ /^Input 'y' is readonly/); - -ok($input->readonly(0)); -ok(!$input->readonly); - -$input->value(222); -ok(@warn, 0); -ok($f->click->uri->query, "x=1&y=222&z=3"); - -$input = $f->find_input("z"); -ok($input->type, "hidden"); -ok($input->readonly); -ok(!$input->disabled); - -$f = HTML::Form->parse(< - -one - -one -two -three - -one -two -three - - - - - - - - - - - - - - - - - - - -EOT -#print $f->dump; -ok($f->find_input("r0")->disabled); -ok(!eval {$f->value("r0", 1);}); -ok($@ && $@ =~ /^The value '1' has been disabled for field 'r0'/); -ok($f->find_input("r0")->disabled(0)); -ok(!$f->find_input("r0")->disabled); -ok($f->value("r0", 1), undef); -ok($f->value("r0"), 1); - -ok(!$f->find_input("r1")->disabled); -ok($f->value("r1", 2), undef); -ok($f->value("r1"), 2); -ok(!eval {$f->value("r1", 1);}); -ok($@ && $@ =~ /^The value '1' has been disabled for field 'r1'/); - -ok($f->value("r2", 1), undef); -ok(!eval {$f->value("r2", 2);}); -ok($@ && $@ =~ /^The value '2' has been disabled for field 'r2'/); -ok(!eval {$f->value("r2", "two");}); -ok($@ && $@ =~ /^The value 'two' has been disabled for field 'r2'/); -ok(!$f->find_input("r2")->disabled(1)); -ok(!eval {$f->value("r2", 1);}); -ok($@ && $@ =~ /^The value '1' has been disabled for field 'r2'/); -ok($f->find_input("r2")->disabled(0)); -ok(!$f->find_input("r2")->disabled); -ok($f->value("r2", 2), 1); - -ok($f->find_input("s0")->disabled); -ok(!$f->find_input("s1")->disabled); -ok(!$f->find_input("s2")->disabled); -ok($f->find_input("s3")->disabled); - -ok(!eval {$f->value("s1", 1);}); -ok($@ && $@ =~ /^The value '1' has been disabled for field 's1'/); - -ok($f->find_input("m0")->disabled); -ok($f->find_input("m1", undef, 1)->disabled); -ok(!$f->find_input("m1", undef, 2)->disabled); -ok(!$f->find_input("m1", undef, 3)->disabled); - -ok(!$f->find_input("m2", undef, 1)->disabled); -ok($f->find_input("m2", undef, 2)->disabled); -ok(!$f->find_input("m2", undef, 3)->disabled); - -ok($f->find_input("m3", undef, 1)->disabled); -ok($f->find_input("m3", undef, 2)->disabled); -ok($f->find_input("m3", undef, 3)->disabled); - -$f->find_input("m3", undef, 2)->disabled(0); -ok(!$f->find_input("m3", undef, 2)->disabled); -ok($f->find_input("m3", undef, 2)->value(2), undef); -ok($f->find_input("m3", undef, 2)->value(undef), 2); - -$f->find_input("m3", undef, 2)->disabled(1); -ok($f->find_input("m3", undef, 2)->disabled); -ok(eval{$f->find_input("m3", undef, 2)->value(2)}, undef); -ok($@ && $@ =~ /^The value '2' has been disabled/); -ok(eval{$f->find_input("m3", undef, 2)->value(undef)}, undef); -ok($@ && $@ =~ /^The 'm3' field can't be unchecked/); - -# multiple select with the same name [RT#18993] -$f = HTML::Form->parse(< - - - -EOT -ok(join("|", $f->form), "bug|hi|bug|mom|nobug|mom"); - -# Try a disabled radiobutton: -$f = HTML::Form->parse(< - - - - -EOT - -ok($f->click->as_string, <<'EOT'); -GET http://localhost/?f=b - -EOT - -$f = HTML::Form->parse(< -
- - -
-EOT - -ok($f->find_input("randomkey")); -ok($f->find_input("randomkey")->challenge, "1234567890"); -ok($f->find_input("randomkey")->keytype, "rsa"); -ok($f->click->as_string, <value(randomkey => "foo"); -ok($f->click->as_string, <parse(< - - -EOT - -ok($f); -ok($f->find_input("t")); - - -@f = HTML::Form->parse(< - - -EOT - -ok(@f, 2); -ok($f[0]->find_input("s")); -ok($f[1]->find_input("t")); - -$f = HTML::Form->parse(< -
- Radio Buttons with Labels - - - - - - -
- -EOT - -ok(join(":", $f->find_input("r0")->value_names), "zero"); -ok(join(":", $f->find_input("r1")->value_names), "one"); -ok(join(":", $f->find_input("r2")->value_names), "two"); -ok(join(":", $f->find_input("r3")->value_names), "nested"); -ok(join(":", $f->find_input("r4")->value_names), "before and after"); - -$f = HTML::Form->parse(< - - - - - - - - - - - - -
-     Keep me informed on the progress of this election - -
-
The place you are registered to vote: -
- County or Parish - - - - Note 2 -
- -EOT -ok(join(":", $f->find_input("keep_informed")->value_names), "off:"); - -$f = HTML::Form->parse(< - - - - - -EOT -ok(join(":", $f->find_input("test")->possible_values), "1:2"); -ok(join(":", $f->find_input("test")->other_possible_values), "2"); - -@warn = (); -$f = HTML::Form->parse(< - - -EOT -ok(@warn, 0); diff --git a/t/live/apache-http10.t b/t/live/apache-http10.t new file mode 100644 index 0000000..f6cf6bc --- /dev/null +++ b/t/live/apache-http10.t @@ -0,0 +1,16 @@ +#!perl -w + +use strict; +use Test; +plan tests => 1; + +use LWP::UserAgent; +my $ua = LWP::UserAgent->new; + +require HTTP::Request; +my $req = HTTP::Request->new(TRACE => "http://www.apache.org/"); +$req->protocol("HTTP/1.0"); +my $res = $ua->simple_request($req); +ok($res->content, qr/HTTP\/1.0/); + +$res->dump(prefix => "# "); diff --git a/t/live/apache-listing.t b/t/live/apache-listing.t deleted file mode 100644 index d79dfd4..0000000 --- a/t/live/apache-listing.t +++ /dev/null @@ -1,27 +0,0 @@ -#!perl -w - -use Test; - -use strict; -use File::Listing; -use LWP::Simple; - -# some sample URLs -my @urls = ( - "http://www.apache.org/dist/apr/?C=N&O=D", - "http://perl.apache.org/rpm/distrib/", - "http://www.cpan.org/modules/by-module/", - ); -plan tests => scalar(@urls); - -for my $url (@urls) { - print "# $url\n"; - my $dir = get($url); - unless ($dir) { - print "# Can't get document at $url\n"; - ok(0); - next; - } - my @listing = parse_dir($dir, undef, "apache"); - ok(@listing); -} diff --git a/t/live/apache.t b/t/live/apache.t deleted file mode 100644 index 33779f9..0000000 --- a/t/live/apache.t +++ /dev/null @@ -1,50 +0,0 @@ -#!perl -w - -use strict; -use Test; -plan tests => 6; - -use Net::HTTP; - - -my $s = Net::HTTP->new(Host => "www.apache.org", - KeepAlive => 1, - Timeout => 15, - PeerHTTPVersion => "1.1", - MaxLineLength => 512) || die "$@"; - -for (1..2) { - $s->write_request(TRACE => "/libwww-perl", - 'User-Agent' => 'Mozilla/5.0', - 'Accept-Language' => 'no,en', - Accept => '*/*'); - - my($code, $mess, %h) = $s->read_response_headers; - print "# $code $mess\n"; - for (sort keys %h) { - print "# $_: $h{$_}\n"; - } - print "\n"; - - ok($code, "200"); - ok($h{'Content-Type'}, "message/http"); - - my $buf; - while (1) { - my $tmp; - my $n = $s->read_entity_body($tmp, 20); - last unless $n; - $buf .= $tmp; - } - $buf =~ s/\r//g; - - ok($buf, <new(); -my $res = $ua->simple_request(HTTP::Request->new(GET => "https://www.apache.org")); - -if ($res->code == 501 && $res->message =~ /Protocol scheme 'https' is not supported/) { - print "1..0 # Skipped: " . $res->message . "\n"; - exit; -} - -plan tests => 2; -ok($res->is_success); -ok($res->content =~ /Apache Software Foundation/); - -$res->dump(prefix => "# "); diff --git a/t/live/jigsaw-auth-b.t b/t/live/jigsaw/auth-b.t similarity index 100% rename from t/live/jigsaw-auth-b.t rename to t/live/jigsaw/auth-b.t diff --git a/t/live/jigsaw-auth-d.t b/t/live/jigsaw/auth-d.t similarity index 100% rename from t/live/jigsaw-auth-d.t rename to t/live/jigsaw/auth-d.t diff --git a/t/live/jigsaw-chunk.t b/t/live/jigsaw/chunk.t similarity index 100% rename from t/live/jigsaw-chunk.t rename to t/live/jigsaw/chunk.t diff --git a/t/live/jigsaw-md5-get.t b/t/live/jigsaw/md5-get.t similarity index 100% rename from t/live/jigsaw-md5-get.t rename to t/live/jigsaw/md5-get.t diff --git a/t/live/jigsaw-md5.t b/t/live/jigsaw/md5.t similarity index 100% rename from t/live/jigsaw-md5.t rename to t/live/jigsaw/md5.t diff --git a/t/live/jigsaw-neg-get.t b/t/live/jigsaw/neg-get.t similarity index 100% rename from t/live/jigsaw-neg-get.t rename to t/live/jigsaw/neg-get.t diff --git a/t/live/jigsaw-neg.t b/t/live/jigsaw/neg.t similarity index 100% rename from t/live/jigsaw-neg.t rename to t/live/jigsaw/neg.t diff --git a/t/live/jigsaw-te.t b/t/live/jigsaw/te.t similarity index 100% rename from t/live/jigsaw-te.t rename to t/live/jigsaw/te.t diff --git a/t/live/online.t b/t/live/online.t new file mode 100644 index 0000000..750e94b --- /dev/null +++ b/t/live/online.t @@ -0,0 +1,13 @@ +#!perl -w + +use strict; +use Test; +plan tests => 2; + +use LWP::UserAgent; +my $ua = LWP::UserAgent->new; + +ok $ua->is_online; + +$ua->protocols_allowed([]); +ok !$ua->is_online; diff --git a/t/local/chunked.t b/t/local/chunked.t deleted file mode 100644 index e11799f..0000000 --- a/t/local/chunked.t +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -use Config; -use HTTP::Daemon; -use Test::More; -# use Time::HiRes qw(sleep); -our $CRLF; -use Socket qw($CRLF); - -our $LOGGING = 0; - -our @TESTS = ( - { - expect => 629, - comment => "traditional, unchunked POST request", - raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1 -User-Agent: UNTRUSTED/1.0 -Content-Type: application/x-www-form-urlencoded -Content-Length: 629 -Host: localhost - -JSR-205=0;font_small=15;png=1;jpg=1;alpha_channel=256;JSR-82=0;JSR-135=1;mot-wt=0;JSR-75-pim=0;pointer_motion_event=0;camera=1;free_memory=455472;heap_size=524284;cldc=CLDC-1.1;canvas_size_y=176;canvas_size_x=176;double_buffered=1;color=65536;JSR-120=1;JSR-184=1;JSR-180=0;JSR-75-file=0;push_socket=0;pointer_event=0;nokia-ui=1;java_platform=xxxxxxxxxxxxxxxxx/xxxxxxx;gif=1;midp=MIDP-1.0 MIDP-2.0;font_large=22;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;font_medium=18;fullscreen_canvas_size_y=220;fullscreen_canvas_size_x=176;java_locale=de;video_encoding=encoding=JPEG&width=176&height=182encoding=JPEG&width=176&height=220;" - }, - { - expect => 8, - comment => "chunked with illegal Content-Length header; tiny message", - raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1 -Host: localhost -Content-Type: application/x-www-form-urlencoded -Content-Length: 8 -Transfer-Encoding: chunked - -8 -icm.x=u2 -0 - -", - }, - { - expect => 868, - comment => "chunked with illegal Content-Length header; medium sized", - raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1 -Host:dev05 -Connection:close -Content-Type:application/x-www-form-urlencoded -Content-Length:868 -transfer-encoding:chunked - -364 -JSR-205=0;font_small=20;png=1;jpg=1;JSR-82=0;JSR-135=1;mot-wt=0;JSR-75-pim=0;http=1;pointer_motion_event=0;browser_launch=1;free_memory=733456;user_agent=xxxxxxxxx/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;heap_size=815080;cldc=CLDC-1.0;canvas_size_y=182;canvas_size_x=176;double_buffered=1;NAVIGATION PRESS=20;JSR-184=0;JSR-120=1;color=32768;JSR-180=0;JSR-75-file=0;RIGHT SOFT KEY=22;NAVIGATION RIGHT=5;KEY *=42;push_socket=0;pointer_event=0;KEY #=35;KEY NUM 9=57;nokia-ui=0;KEY NUM 8=56;KEY NUM 7=55;KEY NUM 6=54;KEY NUM 5=53;gif=1;KEY NUM 4=52;NAVIGATION UP=1;KEY NUM 3=51;KEY NUM 2=50;KEY NUM 1=49;midp=MIDP-2.0 VSCL-1.1.0;font_large=20;KEY NUM 0=48;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;NAVIGATION LEFT=2;LEFT SOFT KEY=21;font_medium=20;fullscreen_canvas_size_y=204;fullscreen_canvas_size_x=176;https=1;NAVIGATION DOWN=6;java_locale=en-DE; -0 - -", - }, - { - expect => 1104, - comment => "chunked correctly, size ~1k; base for the big next test", - raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1 -User-Agent: UNTRUSTED/1.0 -Content-Type: application/x-www-form-urlencoded -Host: localhost:80 -Transfer-Encoding: chunked - -450 -JSR-205=0;font_small=15;png=1;jpg=1;jsr184_dithering=0;CLEAR/DELETE=-8;JSR-82=0;alpha_channel=32;JSR-135=1;mot-wt=0;JSR-75-pim=0;http=1;pointer_motion_event=0;browser_launch=1;BACK/RETURN=-11;camera=1;free_memory=456248;user_agent=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;heap_size=524284;cldc=CLDC-1.1;canvas_size_y=176;canvas_size_x=176;double_buffered=1;NAVIGATION PRESS=-5;JSR-184=1;JSR-120=1;color=65536;JSR-180=0;JSR-75-file=0;RIGHT SOFT KEY=-7;NAVIGATION RIGHT=-4;KEY *=42;push_socket=0;pointer_event=0;KEY #=35;KEY NUM 9=57;nokia-ui=1;KEY NUM 8=56;KEY NUM 7=55;KEY NUM 6=54;KEY NUM 5=53;java_platform=xxxxxxxxxxxxxxxxx/xxxxxxx;KEY NUM 4=52;gif=1;KEY NUM 3=51;NAVIGATION UP=-1;KEY NUM 2=50;KEY NUM 1=49;midp=MIDP-1.0 MIDP-2.0;font_large=22;KEY NUM 0=48;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;NAVIGATION LEFT=-3;LEFT SOFT KEY=-6;jsr184_antialiasing=0;font_medium=18;fullscreen_canvas_size_y=220;fullscreen_canvas_size_x=176;https=1;NAVIGATION DOWN=-2;java_locale=de;video_encoding=encoding=JPEG&width=176&height=182encoding=JPEG&width=176&height=220; -0 - -" - }, - { - expect => 1104*1024, - comment => "chunked with many chunks", - raw => ("POST /cgi-bin/redir-TE.pl HTTP/1.1 -User-Agent: UNTRUSTED/1.0 -Content-Type: application/x-www-form-urlencoded -Host: localhost:80 -Transfer-Encoding: chunked - -".("450 -JSR-205=0;font_small=15;png=1;jpg=1;jsr184_dithering=0;CLEAR/DELETE=-8;JSR-82=0;alpha_channel=32;JSR-135=1;mot-wt=0;JSR-75-pim=0;http=1;pointer_motion_event=0;browser_launch=1;BACK/RETURN=-11;camera=1;free_memory=456248;user_agent=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;heap_size=524284;cldc=CLDC-1.1;canvas_size_y=176;canvas_size_x=176;double_buffered=1;NAVIGATION PRESS=-5;JSR-184=1;JSR-120=1;color=65536;JSR-180=0;JSR-75-file=0;RIGHT SOFT KEY=-7;NAVIGATION RIGHT=-4;KEY *=42;push_socket=0;pointer_event=0;KEY #=35;KEY NUM 9=57;nokia-ui=1;KEY NUM 8=56;KEY NUM 7=55;KEY NUM 6=54;KEY NUM 5=53;java_platform=xxxxxxxxxxxxxxxxx/xxxxxxx;KEY NUM 4=52;gif=1;KEY NUM 3=51;NAVIGATION UP=-1;KEY NUM 2=50;KEY NUM 1=49;midp=MIDP-1.0 MIDP-2.0;font_large=22;KEY NUM 0=48;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;NAVIGATION LEFT=-3;LEFT SOFT KEY=-6;jsr184_antialiasing=0;font_medium=18;fullscreen_canvas_size_y=220;fullscreen_canvas_size_x=176;https=1;NAVIGATION DOWN=-2;java_locale=de;video_encoding=encoding=JPEG&width=176&height=182encoding=JPEG&width=176&height=220; -"x1024)."0 - -") - }, - ); - - -my $can_fork = $Config{d_fork} || - (($^O eq 'MSWin32' || $^O eq 'NetWare') and - $Config{useithreads} and $Config{ccflags} =~ /-DPERL_IMPLICIT_SYS/); - -my $tests = @TESTS; -my $tport = 8333; - -my $tsock = IO::Socket::INET->new(LocalAddr => '0.0.0.0', - LocalPort => $tport, - Listen => 1, - ReuseAddr => 1); -if (!$can_fork) { - plan skip_all => "This system cannot fork"; -} -elsif (!$tsock) { - plan skip_all => "Cannot listen on 0.0.0.0:$tport"; -} -else { - close $tsock; - plan tests => $tests; -} - -sub mywarn ($) { - return unless $LOGGING; - my($mess) = @_; - open my $fh, ">>", "http-daemon.out" - or die $!; - my $ts = localtime; - print $fh "$ts: $mess\n"; - close $fh or die $!; -} - - -my $pid; -if ($pid = fork) { - sleep 4; - for my $t (0..$#TESTS) { - my $test = $TESTS[$t]; - my $raw = $test->{raw}; - $raw =~ s/\r?\n/$CRLF/mg; - if (0) { - open my $fh, "| socket localhost $tport" or die; - print $fh $test; - } - use IO::Socket::INET; - my $sock = IO::Socket::INET->new( - PeerAddr => "127.0.0.1", - PeerPort => $tport, - ) or die; - if (0) { - for my $pos (0..length($raw)-1) { - print $sock substr($raw,$pos,1); - sleep 0.001; - } - } else { - print $sock $raw; - } - local $/; - my $resp = <$sock>; - close $sock; - my($got) = $resp =~ /\r?\n\r?\n(\d+)/s; - is($got, - $test->{expect}, - "[$test->{expect}] $test->{comment}", - ); - } - wait; -} else { - die "cannot fork: $!" unless defined $pid; - my $d = HTTP::Daemon->new( - LocalAddr => '0.0.0.0', - LocalPort => $tport, - ReuseAddr => 1, - ) or die; - mywarn "Starting new daemon as '$$'"; - my $i; - LISTEN: while (my $c = $d->accept) { - my $r = $c->get_request; - mywarn sprintf "headers[%s] content[%s]", $r->headers->as_string, $r->content; - my $res = HTTP::Response->new(200,undef,undef,length($r->content).$CRLF); - $c->send_response($res); - $c->force_last_request; # we're just not mature enough - $c->close; - undef($c); - last if ++$i >= $tests; - } -} - - - -# Local Variables: -# mode: cperl -# cperl-indent-level: 2 -# End: diff --git a/t/local/http.t b/t/local/http.t index 421e7a3..081a0fc 100644 --- a/t/local/http.t +++ b/t/local/http.t @@ -8,6 +8,8 @@ unless (-f "CAN_TALK_TO_OURSELF") { exit; } +delete $ENV{PERL_LWP_ENV_PROXY}; + $| = 1; # autoflush require IO::Socket; # make sure this work before we try to make a HTTP::Daemon @@ -47,8 +49,8 @@ else { open(DAEMON, "$perl local/http.t daemon |") or die "Can't exec daemon: $!"; } -use Test; -plan tests => 54; +use Test::More; +plan tests => 59; my $greeting = ; $greeting =~ /(<[^>]+>)/; @@ -75,9 +77,9 @@ $req = new HTTP::Request GET => url("/not_found", $base); $req->header(X_Foo => "Bar"); $res = $ua->request($req); -ok($res->is_error); -ok($res->code, 404); -ok($res->message, qr/not\s+found/i); +ok($res->is_error, 'is_error'); +is($res->code, 404, 'response code 404'); +like($res->message, qr/not\s+found/i, '404 message'); # we also expect a few headers ok($res->server); ok($res->date); @@ -108,23 +110,23 @@ $res = $ua->request($req); #print $res->as_string; ok($res->is_success); -ok($res->code, 200); -ok($res->message, "OK"); +is($res->code, 200, 'status code 200'); +is($res->message, "OK", 'message OK'); $_ = $res->content; @accept = /^Accept:\s*(.*)/mg; -ok($_, qr/^From:\s*gisle\@aas\.no\n/m); -ok($_, qr/^Host:/m); -ok(@accept, 3); -ok($_, qr/^Accept:\s*text\/html/m); -ok($_, qr/^Accept:\s*text\/plain/m); -ok($_, qr/^Accept:\s*image\/\*/m); -ok($_, qr/^If-Modified-Since:\s*\w{3},\s+\d+/m); -ok($_, qr/^Long-Text:\s*This.*broken between/m); -ok($_, qr/^Foo-Bar:\s*1\n/m); -ok($_, qr/^X-Foo:\s*Bar\n/m); -ok($_, qr/^User-Agent:\s*Mozilla\/0.01/m); +like($_, qr/^From:\s*gisle\@aas\.no\n/m); +like($_, qr/^Host:/m); +is(@accept, 3, '3 items in accept'); +like($_, qr/^Accept:\s*text\/html/m); +like($_, qr/^Accept:\s*text\/plain/m); +like($_, qr/^Accept:\s*image\/\*/m); +like($_, qr/^If-Modified-Since:\s*\w{3},\s+\d+/m); +like($_, qr/^Long-Text:\s*This.*broken between/m); +like($_, qr/^Foo-Bar:\s*1\n/m); +like($_, qr/^X-Foo:\s*Bar\n/m); +like($_, qr/^User-Agent:\s*Mozilla\/0.01/m); # Try it with the higher level 'get' interface $res = $ua->get(url("/echo/path_info?query", $base), @@ -134,7 +136,50 @@ $res = $ua->get(url("/echo/path_info?query", $base), X_Foo => "Bar", ); #$res->dump; -ok($res->code, 200); +is($res->code, 200, 'response code 200'); + +#---------------------------------------------------------------- +print "UserAgent->put...\n"; +sub httpd_put_echo +{ + my($c, $req) = @_; + $c->send_basic_header(200); + print $c "Content-Type: message/http\015\012"; + $c->send_crlf; + print $c $req->as_string; +} +ok($res->content, qr/^From: gisle\@aas.no$/m); +# Try it with the higher level 'get' interface +$res = $ua->put(url("/echo/path_info?query", $base), + Accept => 'text/html', + Accept => 'text/plain; q=0.9', + Accept => 'image/*', + X_Foo => "Bar", +); +#$res->dump; +is($res->code, 200, 'response code 200'); +ok($res->content, qr/^From: gisle\@aas.no$/m); + +#---------------------------------------------------------------- +print "UserAgent->delete...\n"; +sub httpd_delete_echo +{ + my($c, $req) = @_; + $c->send_basic_header(200); + print $c "Content-Type: message/http\015\012"; + $c->send_crlf; + print $c $req->as_string; +} +ok($res->content, qr/^From: gisle\@aas.no$/m); +# Try it with the higher level 'get' interface +$res = $ua->delete(url("/echo/path_info?query", $base), + Accept => 'text/html', + Accept => 'text/plain; q=0.9', + Accept => 'image/*', + X_Foo => "Bar", +); +#$res->dump; +is($res->code, 200, 'response code 200'); ok($res->content, qr/^From: gisle\@aas.no$/m); #---------------------------------------------------------------- @@ -166,7 +211,7 @@ $res = $ua->request($req); ok($res->is_success); ok($res->content_type, 'text/html'); -ok($res->content_length, 147); +is($res->content_length, 147, '147 content length'); ok($res->title, 'En prøve'); ok($res->content, qr/å være/); @@ -174,13 +219,13 @@ ok($res->content, qr/ $res = $ua->request($req); #print $res->as_string; ok($res->is_error); -ok($res->code, 404); # not found - +is($res->code, 404, 'response code 404'); # not found + # Then try to list current directory $req = new HTTP::Request GET => url("/file?name=.", $base); $res = $ua->request($req); #print $res->as_string; -ok($res->code, 501); # NYI +is($res->code, 501, 'response code 501'); # NYI #---------------------------------------------------------------- @@ -198,7 +243,7 @@ $res = $ua->request($req); ok($res->is_success); ok($res->content, qr|/echo/redirect|); ok($res->previous->is_redirect); -ok($res->previous->code, 301); +is($res->previous->code, 301, 'response code 301'); # Let's test a redirect loop too sub httpd_get_redirect2 { shift->send_redirect("/redirect3/") } @@ -210,12 +255,12 @@ $res = $ua->request($req); #print $res->as_string; ok($res->is_redirect); ok($res->header("Client-Warning"), qr/loop detected/i); -ok($res->redirects, 5); +is($res->redirects, 5, '5 max redirects'); $ua->max_redirect(0); $res = $ua->request($req); -ok($res->previous, undef); -ok($res->redirects, 0); +is($res->previous, undef, 'undefined previous'); +is($res->redirects, 0, 'zero redirects'); $ua->max_redirect(5); #---------------------------------------------------------------- @@ -260,7 +305,7 @@ ok($res->is_success); # Let's try with a $ua that does not pass out credentials $res = $ua->request($req); -ok($res->code, 401); +is($res->code, 401, 'respone code 401'); # Let's try to set credentials for this realm $ua->credentials($req->uri->host_port, "libwww-perl", "ok 12", "xyzzy"); @@ -270,7 +315,7 @@ ok($res->is_success); # Then illegal credentials $ua->credentials($req->uri->host_port, "libwww-perl", "user", "passwd"); $res = $ua->request($req); -ok($res->code, 401); +is($res->code, 401, 'response code 401'); #---------------------------------------------------------------- @@ -376,5 +421,5 @@ sub httpd_get_quit $req = new HTTP::Request GET => url("/quit", $base); $res = $ua->request($req); -ok($res->code, 503); +is($res->code, 503, 'response code is 503'); ok($res->content, qr/Bye, bye/); diff --git a/t/robot/rules-dbm.t b/t/robot/rules-dbm.t deleted file mode 100644 index 2335b94..0000000 --- a/t/robot/rules-dbm.t +++ /dev/null @@ -1,128 +0,0 @@ - -print "1..13\n"; - - -use WWW::RobotRules::AnyDBM_File; - -$file = "test-$$"; - -$r = new WWW::RobotRules::AnyDBM_File "myrobot/2.0", $file; - -$r->parse("http://www.aas.no/robots.txt", ""); - -$r->visit("www.aas.no:80"); - -print "not " if $r->no_visits("www.aas.no:80") != 1; -print "ok 1\n"; - - -$r->push_rules("www.sn.no:80", "/aas", "/per"); -$r->push_rules("www.sn.no:80", "/god", "/old"); - -@r = $r->rules("www.sn.no:80"); -print "Rules: @r\n"; - -print "not " if "@r" ne "/aas /per /god /old"; -print "ok 2\n"; - -$r->clear_rules("per"); -$r->clear_rules("www.sn.no:80"); - -@r = $r->rules("www.sn.no:80"); -print "Rules: @r\n"; - -print "not " if "@r" ne ""; -print "ok 3\n"; - -$r->visit("www.aas.no:80", time+10); -$r->visit("www.sn.no:80"); - -print "No visits: ", $r->no_visits("www.aas.no:80"), "\n"; -print "Last visit: ", $r->last_visit("www.aas.no:80"), "\n"; -print "Fresh until: ", $r->fresh_until("www.aas.no:80"), "\n"; - -print "not " if $r->no_visits("www.aas.no:80") != 2; -print "ok 4\n"; - -print "not " if abs($r->last_visit("www.sn.no:80") - time) > 2; -print "ok 5\n"; - -$r = undef; - -# Try to reopen the database without a name specified -$r = new WWW::RobotRules::AnyDBM_File undef, $file; -$r->visit("www.aas.no:80"); - -print "not " if $r->no_visits("www.aas.no:80") != 3; -print "ok 6\n"; - -print "Agent-Name: ", $r->agent, "\n"; -print "not " if $r->agent ne "myrobot"; -print "ok 7\n"; - -$r = undef; - -print "*** Dump of database ***\n"; -tie(%cat, AnyDBM_File, $file, 0, 0644) or die "Can't tie: $!"; -while (($key,$val) = each(%cat)) { - print "$key\t$val\n"; -} -print "******\n"; - -untie %cat; - -# Try to open database with a different agent name -$r = new WWW::RobotRules::AnyDBM_File "MOMSpider/2.0", $file; - -print "not " if $r->no_visits("www.sn.no:80"); -print "ok 8\n"; - -# Try parsing -$r->parse("http://www.sn.no:8080/robots.txt", <rules("www.sn.no:8080"); -print "not " if "@r" ne "/foo /bar"; -print "ok 9\n"; - -print "not " if $r->allowed("http://www.sn.no") >= 0; -print "ok 10\n"; - -print "not " if $r->allowed("http://www.sn.no:8080/foo/gisle"); -print "ok 11\n"; - -sleep(5); # wait until file has expired -print "not " if $r->allowed("http://www.sn.no:8080/foo/gisle") >= 0; -print "ok 12\n"; - - -$r = undef; - -print "*** Dump of database ***\n"; -tie(%cat, AnyDBM_File, $file, 0, 0644) or die "Can't tie: $!"; -while (($key,$val) = each(%cat)) { - print "$key\t$val\n"; -} -print "******\n"; - -untie %cat; # Otherwise the next line fails on DOSish - -while (unlink("$file", "$file.pag", "$file.dir", "$file.db")) {} - -# Try open a an emty database without specifying a name -eval { - $r = new WWW::RobotRules::AnyDBM_File undef, $file; -}; -print $@; -print "not " unless $@; # should fail -print "ok 13\n"; - -unlink "$file", "$file.pag", "$file.dir", "$file.db"; diff --git a/t/robot/rules.t b/t/robot/rules.t deleted file mode 100644 index 26b1025..0000000 --- a/t/robot/rules.t +++ /dev/null @@ -1,230 +0,0 @@ -#!/local/bin/perl - -=head1 NAME - -robot-rules.t - -=head1 DESCRIPTION - -Test a number of different A files against a number -of different User-agents. - -=cut - -require WWW::RobotRules; -use Carp; -use strict; - -print "1..50\n"; # for Test::Harness - -# We test a number of different /robots.txt files, -# - -my $content1 = < - 1 => 'http://foo/private' => 1, - 2 => 'http://foo/also_private' => 1, - ], - - [$content1, 'Wubble' => - 3 => 'http://foo/private' => 0, - 4 => 'http://foo/also_private' => 0, - 5 => 'http://foo/other' => 1, - ], - - [$content2, 'MOMspider' => - 6 => 'http://foo/private' => 0, - 7 => 'http://foo/other' => 1, - ], - - [$content2, 'Wubble' => - 8 => 'http://foo/private' => 1, - 9 => 'http://foo/also_private' => 1, - 10 => 'http://foo/other' => 1, - ], - - [$content3, 'MOMspider' => - 11 => 'http://foo/private' => 1, - 12 => 'http://foo/other' => 1, - ], - - [$content3, 'Wubble' => - 13 => 'http://foo/private' => 1, - 14 => 'http://foo/other' => 1, - ], - - [$content4, 'MOMspider' => - 15 => 'http://foo/private' => 1, - 16 => 'http://foo/this' => 0, - 17 => 'http://foo/that' => 1, - ], - - [$content4, 'Another' => - 18 => 'http://foo/private' => 1, - 19 => 'http://foo/this' => 1, - 20 => 'http://foo/that' => 0, - ], - - [$content4, 'Wubble' => - 21 => 'http://foo/private' => 0, - 22 => 'http://foo/this' => 1, - 23 => 'http://foo/that' => 1, - ], - - [$content4, 'Another/1.0' => - 24 => 'http://foo/private' => 1, - 25 => 'http://foo/this' => 1, - 26 => 'http://foo/that' => 0, - ], - - [$content4, "SvartEnke1" => - 27 => "http://foo/" => 0, - 28 => "http://foo/this" => 0, - 29 => "http://bar/" => 1, - ], - - [$content4, "SvartEnke2" => - 30 => "http://foo/" => 1, - 31 => "http://foo/this" => 1, - 32 => "http://bar/" => 1, - ], - - [$content4, "MomSpiderJr" => # should match "MomSpider" - 33 => 'http://foo/private' => 1, - 34 => 'http://foo/also_private' => 1, - 35 => 'http://foo/this/' => 0, - ], - - [$content4, "SvartEnk" => # should match "*" - 36 => "http://foo/" => 1, - 37 => "http://foo/private/" => 0, - 38 => "http://bar/" => 1, - ], - - [$content5, 'Villager/1.0' => - 39 => 'http://foo/west-wing/' => 0, - 40 => 'http://foo/' => 0, - ], - - [$content5, 'Belle/2.0' => - 41 => 'http://foo/west-wing/' => 0, - 42 => 'http://foo/' => 1, - ], - - [$content5, 'Beast/3.0' => - 43 => 'http://foo/west-wing/' => 1, - 44 => 'http://foo/' => 1, - ], - - [$content6, 'Villager/1.0' => - 45 => 'http://foo/west-wing/' => 0, - 46 => 'http://foo/' => 0, - ], - - [$content6, 'Belle/2.0' => - 47 => 'http://foo/west-wing/' => 0, - 48 => 'http://foo/' => 1, - ], - - [$content6, 'Beast/3.0' => - 49 => 'http://foo/west-wing/' => 1, - 50 => 'http://foo/' => 1, - ], - - # when adding tests, remember to increase - # the maximum at the top - - ); - -my $t; - -for $t (@tests1) { - my ($content, $ua) = splice(@$t, 0, 2); - - my $robotsrules = new WWW::RobotRules($ua); - $robotsrules->parse('http://foo/robots.txt', $content); - - my ($num, $path, $expected); - while(($num, $path, $expected) = splice(@$t, 0, 3)) { - my $allowed = $robotsrules->allowed($path); - $allowed = 1 if $allowed; - if($allowed != $expected) { - $robotsrules->dump; - confess "Test Failed: $ua => $path ($allowed != $expected)"; - } - print "ok $num\n"; - } -} diff --git a/t/robot/ua-get.t b/t/robot/ua-get.t index 5c18afa..5754c4b 100644 --- a/t/robot/ua-get.t +++ b/t/robot/ua-get.t @@ -8,6 +8,8 @@ unless (-f "CAN_TALK_TO_OURSELF") { exit; } +delete $ENV{PERL_LWP_ENV_PROXY}; + $| = 1; # autoflush require IO::Socket; # make sure this work before we try to make a HTTP::Daemon diff --git a/t/robot/ua.t b/t/robot/ua.t index 5f679ae..21ad5c8 100644 --- a/t/robot/ua.t +++ b/t/robot/ua.t @@ -8,6 +8,8 @@ unless (-f "CAN_TALK_TO_OURSELF") { exit; } +delete $ENV{PERL_LWP_ENV_PROXY}; + $| = 1; # autoflush require IO::Socket; # make sure this work before we try to make a HTTP::Daemon -- 2.34.1