--- /dev/null
+Adam Newby <adam@NewsNow.co.uk>
+Albert Dvornik <bert@genscan.com>
+Alexandre Duret-Lutz <duret_g@lrde.epita.fr>
+Andreas Gustafsson <gson@araneus.fi>
+Andreas König <andreas.koenig@anima.de>
+Andreas König <koenig@mind.de>
+Andrew Pimlott <andrew@pimlott.net>
+Andy Lester <andy@petdance.com>
+Ben Coleman <bcoleman@mindspring.com>
+Benjamin Low <ben@snrc.uow.edu.au>
+Ben Low <ben@snrc.uow.edu.au>
+Ben Tilly <Ben_Tilly@trepp.com>
+Blair Zajac <blair@gps.caltech.edu>
+Blair Zajac <blair@orcaware.com>
+Bob Dalgleish
+BooK <book@netcourrier.com>
+Brad Hughes <brad@tmc.naecker.com>
+Brian J. Murrell
+Brian McCauley <B.A.McCauley@bham.ac.uk>
+Charles C. Fu <ccwf@bacchus.com>
+Charles Lane <lane@DUPHY4.Physics.Drexel.Edu>
+Chris Nandor <pudge@pobox.com>
+Christian Gilmore <cgilmore@tivoli.com>
+Chris W. Unger <cunger@cas.org>
+Craig Macdonald <craig@freeasphost.co.uk>
+Dale Couch <dcouch@training.orl.lmco.com>
+Dan Kubb <dan.kubb@onautopilot.com>
+Dave Dunkin <dave_dunkin@hotmail.com>
+Dave W. Smith <dws@postcognitive.com>
+David Coppit <david@coppit.org>
+David Dick <david_dick@iprimus.com.au>
+David D. Kilzer <ddkilzer@madison.dseg.ti.com>
+Doug MacEachern <dougm@covalent.net>
+Doug MacEachern <dougm@osf.org>
+Doug MacEachern <dougm@pobox.com>
+Edward Avis <epa98@doc.ic.ac.uk>
+<erik@mediator.uni-c.dk>
+Gary Shea <shea@gtsdesign.com>
+Gisle Aas <aas@oslonett.no>
+Gisle Aas <aas@sn.no>
+Gisle Aas <gisle@aas.no>
+Gisle Aas <gisle@ActiveState.com>
+Graham Barr
+Gurusamy Sarathy <gsar@ActiveState.com>
+Gurusamy Sarathy <gsar@engin.umich.edu>
+Hans de Graaff <hans@degraaff.org>
+Harald Joerg <haj@oook.m.uunet.de>
+Harry Bochner <bochner@das.harvard.edu>
+Hugo <hv@crypt.compulink.co.uk>
+Ilya Zakharevich
+INOUE Yoshinari <inoue@kusm.kyoto-u.ac.jp>
+Ivan Panchenko
+Jack Shirazi
+James Tillman
+Jan Dubois <jand@ActiveState.com>
+Jared Rhine
+Jim Stern <jstern@world.northgrum.com>
+Joao Lopes <developer@st3tailor.com.br>
+John Klar <j.klar@xpedite.com>
+Johnny Lee <typo_pl@hotmail.com>
+Josh Kronengold <mneme@mcny.com>
+Josh Rai <josh@rai.name>
+Joshua Chamas <joshua@chamas.com>
+Joshua Hoblitt <jhoblitt@ifa.hawaii.edu>
+Kartik Subbarao <subbarao@computer.org>
+Keiichiro Nagano <knagano@sodan.org>
+Ken Williams <ken@mathforum.org>
+KONISHI Katsuhiro <konishi@din.or.jp>
+Lee T Lindley <Lee.Lindley@viasystems.com>
+Liam Quinn <liam@htmlhelp.com>
+Marc Hedlund <hedlund@best.com>
+Marc Langheinrich <marc@ccm.cl.nec.co.jp>
+Mark D. Anderson <mda@discerning.com>
+Marko Asplund <aspa@hip.fi>
+Mark Stosberg <markstos@cpan.org>
+Markus B Krüger <markusk@pvv.org>
+Markus Laker <mlaker@contax.co.uk>
+Martijn Koster <m.koster@nexor.co.uk>
+Martin Thurn <mthurn@northropgrumman.com>
+Matthew Eldridge <eldridge@Graphics.Stanford.EDU>
+<Matthew.van.Eerde@hbinc.com>
+Matt Sergeant <matt-news@sergeant.org>
+Michael A. Chase <mchase@ix.netcom.com>
+Michael Quaranta <quaranta@vnet.IBM.COM>
+Michael Thompson <mickey@berkeley.innomedia.com>
+Mike Schilli <schilli1@pacbell.net>
+Moshe Kaminsky <kaminsky@math.huji.ac.il>
+Nathan Torkington <gnat@frii.com>
+Nicolai Langfeldt <janl@ifi.uio.no>
+Nicolai Langfeldt <janl@math.uio.no>
+Norton Allen <allen@huarp.harvard.edu>
+Olly Betts <olly@muscat.co.uk>
+Paul J. Schinder <schinder@leprss.gsfc.nasa.gov>
+<peterm@zeta.org.au>
+Philip GuentherDaniel Buenzli <buenzli@rzu.unizh.ch>
+Pon Hwa Lin <koala@fragment.com>
+Radoslaw Zielinski <radek@karnet.pl>
+Radu Greab <radu@netsoft.ro>
+Randal L. Schwartz <merlyn@stonehenge.com>
+Richard Chen <richard@lexitech.com>
+Robin Barker <Robin.Barker@npl.co.uk>
+Roy Fielding <fielding@beach.w3.org>
+Sander van Zoest <sander@covalent.net>
+Sean M. Burke <sburke@cpan.org>
+<shildreth@emsphone.com>
+Slaven Rezic <slaven@rezic.de>
+Steve A Fink <steve@fink.com>
+Steve Hay <steve.hay@uk.radan.com>
+Steven Butler <stevenb@kjross.com.au>
+<Steve_Kilbane@cegelecproj.co.uk>
+Takanori Ugai <ugai@jp.fujitsu.com>
+Thomas Lotterer <thl@dev.de.cw.com>
+Tim Bunce
+Tom Hughes <thh@cyberscience.com>
+Tony Finch <fanf@demon.net>
+Ville Skyttä <ville.skytta@iki.fi>
+Ward Vandewege <ward@pong.be>
+William York <william@mathworks.com>
+Yale Huang <yale@sdf-eu.org>
+Yitzchak Scott-Thoennes <sthoenna@efn.org>
--- /dev/null
+_______________________________________________________________________________
+2010-05-13 Release 5.836
+
+Gisle Aas (1):
+ Fix problem where $resp->base would downcase its return value
+
+
+
+_______________________________________________________________________________
+2010-05-05 Release 5.835
+
+Gisle Aas (12):
+ simple string can be simplified
+ Make $mess->decoded_content remove XML encoding declarations [RT#52572]
+ Don't allow saving to filenames starting with '.' suggested by server
+ Avoid race between testing for existence of output file and opening the file
+ Minor doc fixup -- wrongly ucfirsted word
+ Use decoded_content in HTTP:Response synopsis [RT#54139]
+ sun.com is no more. rip!
+ Trivial layout tweak to reduce variable scope.
+ Add 'make test_hudson' target
+ Implement alt_charset parameter for decoded_content()
+ Test decoding with different charset parameters
+ lwp-download now needs the -s option to honor the Content-Disposition header
+
+Ville Skyttä (9):
+ Make LWP::MediaTypes::media_suffix case insensitive.
+ Skip XML decoding tests if XML::Simple is not available.
+ Documentation fixes.
+ Fix m_media_type => "xhtml" matching.
+ Make parse_head() apply to data: requests.
+ Documentation spelling fixes.
+ Documentation grammar fixes.
+ Use $uri->secure in m_secure if available.
+ Fix handling of multiple (same) base headers, and parameters in them.
+
+Mark Stosberg (5):
+ Strip out empty lines separated by CRLF
+ Best Practice: avoid indirect object notation
+ Speed up as_string by 4% by having _sorted_field_names return a reference
+ Speed up scan() a bit. as_string() from this branch is now 6% faster
+ Port over as_string() optimizations from HTTP::Headers::Fast
+
+Tom Hukins (2):
+ Link to referenced documentation.
+ Update repository location.
+
+Father Chrysostomos (1):
+ Remove needless (and actually harmful) local $_
+
+Sean M. Burke (1):
+ "Perl & LWP" is available online
+
+
+
+_______________________________________________________________________________
+2009-11-21 Release 5.834
+
+Gisle Aas (4):
+ Check for sane default_headers argument [RT#50393]
+ Add $ua->local_address attribute [RT#40912]
+ Test that generation of boundary works [RT#49396]
+ Page does not display the "standard" apache listing any more
+
+Ville Skyttä (2):
+ Remove unneeded executable permissions.
+ Switch compression/decompression to use the IO::Compress/IO::Uncompress and
+ Compress::Raw::Zlib family of modules.
+
+Slaven Rezic (1):
+ lwp-request should use stderr for auth [RT#21620]
+
+
+
+_______________________________________________________________________________
+2009-10-06 Release 5.833
+
+
+Gisle Aas (5):
+ Deal with cookies that expire far into the future [RT#50147]
+ Deal with cookies that expire at or before epoch [RT#49467]
+ Pass separate type for https to LWP::ConnCache [RT#48899]
+ Improved handling of the User-Agent header [RT#48461]
+ HTTP::Cookies add_cookie_header previous Cookies [RT#46106]
+
+Andreas J. Koenig (1):
+ Improve diagnostics from LWP::UserAgent::mirror [RT#48869]
+
+Slaven Rezic (1):
+ mirror should die in case X-Died is set [RT#48236]
+
+Ville Skyttä (1):
+ Increase default Net::HTTP max line length to 8k.
+
+
+
+_______________________________________________________________________________
+2009-09-21 Release 5.832
+
+
+Ville Skyttä (6):
+ Fix net test suite.
+ Comment spelling fixes.
+ Fix links to old Netscape cookie specification.
+ Documentation spelling fixes.
+ Improve max line length exceeded/read error messages.
+ Do not warn about seemingly wellformed but unrecognized robots.txt lines.
+
+Gisle Aas (1):
+ $mess->content_charset would fail for empty content
+
+mschilli (1):
+ Further restrict what variables env_proxy() process
+
+
+
+_______________________________________________________________________________
+2009-08-13 Release 5.831
+
+
+Ville Skyttä (3):
+ Fix bzip2 content encoding/decoding.
+ send_te() doc grammar fix.
+ Document time2str() behavior with an undefined argument.
+
+Gisle Aas (1):
+ HTML::Message's content_charset trigger warnings from HTML::Parser [RT#48621]
+
+
+
+_______________________________________________________________________________
+2009-07-26 Release 5.830
+
+Gisle Aas (1):
+ select doesn't return undef on failure [RT#32356]
+
+Ville Skyttä (1):
+ Add raw RFC 1951 deflate test case.
+
+
+
+_______________________________________________________________________________
+2009-07-07 Release 5.829
+
+This release removes callback handlers that were left over on the returned
+HTTP::Responses. This was problematic because it created reference loops
+preventing the Perl garbage collector from releasing their memory. Another
+problem was that Storable by default would not serialize these objects any
+more.
+
+This release also adds support for locating HTML::Form inputs by id or class
+attribute; for instance $form->value("#foo", 42) will set the value on the
+input with the ID of "foo".
+
+
+Gisle Aas (5):
+ Make the example code 'use strict' clean by adding a my
+ Avoid cycle in response
+ Clean up handlers has from response after data processing is done
+ Support finding inputs by id or class in HTML::Form
+ Test HTML::Form selectors
+
+Mark Stosberg (1):
+ Tidy and document the internals of mirror() better [RT#23450]
+
+phrstbrn (1):
+ Avoid warnings from HTML::Form [RT#42654]
+
+
+
+_______________________________________________________________________________
+2009-06-25 Release 5.828
+
+A quick new release to restore compatiblity with perl-5.6.
+
+
+Gisle Aas (4):
+ Less noisy behaviour when we can't download the documents
+ Restore perl-5.6 compatiblity [RT#47054]
+ Don't decode US-ASCII and ISO-8859-1 content
+ Some versions of Encode don't support UTF-16-BE [RT#47152]
+
+Ville Skyttä (1):
+ Spelling fixes.
+
+
+
+_______________________________________________________________________________
+2009-06-15 Release 5.827
+
+The main news this time is better detection of what character set the document
+in a response uses and the addition of the lwp-dump script that I found useful.
+
+
+Gisle Aas (31):
+ Added lwp-dump script
+ Replace calls to $req->url with $req->uri
+ Also need to encode strings in the latin1 range
+ Ignore the value set for file inputs [RT#46911]
+ Add docs to lwp-dump
+ Don't let lwp-dump follow redirects
+ Support --method options
+ Implement the --agent option
+ Dictionary order for the option docs; document --method
+ Merge branch 'dump'
+ Files are passed as an array and we must not stringify it.
+ Add content_charset method to HTTP::Message
+ Start guessing the charset for a message
+ Let content_charset guess the charset to use for decoded_content
+ Specify what's missing for the XML and HTML case
+ Provide charset parameter for HTML::Form->parse()
+ Make content_charset sniff for <meta> elements specifying the charset.
+ Determine charset of XML documents
+ Get rid of the _trivial_http_get() implementation
+ Update the bundled media.types file
+ LWP::Simple::get() now returns decoded_content [RT#44435]
+ Implement content_type_charset method for HTTP::Headers
+ Implement content_is_text method for HTTP::Headers
+ Make use of content_is_text and content_type_charset in decoded_content
+ Don't let the parse_head callback append to the HTTP headers
+ Don't set Range header on request when max_size is used [RT#17208]
+ Still show client headers for internal responses
+ Document Client-Warning: Internal response
+ Don't use 'no' as example domain for no_proxy docs [RT#43728]
+ Drop exit from the Makefile.PL [RT#43060]
+ Merge branch 'content_charset'
+
+Alex Kapranoff (1):
+ Support "accept-charset" attribute in HTML::Form
+
+Mark Stosberg (1):
+ new tests for max_size and 206 responses [RT#46230]
+
+murphy (1):
+ Reformulation of Client-Warning: Internal documentation
+
+
+
+_______________________________________________________________________________
+2009-04-24 Release 5.826
+
+Gisle Aas (2):
+ Avoid returning stale Content-Type header after message parts have been updated
+ Don't let content saved to file be affected by the $\ setting
+
+Graeme Thompson (1):
+ Issues around multipart boundaries [RT#28970]
+
+Mike Schilli (1):
+ Ignore random _proxy env variables, allow only valid schemes
+
+Slaven Rezic (1):
+ README.SSL is not anymore available at the linpro.no URL.
+
+john9art (1):
+ Make LWP::UserAgent constructor honor the default_headers option [RT#16637]
+
+
+
+_______________________________________________________________________________
+2009-02-16 Release 5.825
+
+Zefram (1):
+ Fixup test failure with perl-5.8.8 and older; qr/$/m doesn't work
+
+
+
+_______________________________________________________________________________
+2009-02-13 Release 5.824
+
+Gisle Aas (7):
+ Make format_request() ensure that it returns bytes [RT#42396]
+ Force bytes in all the format_* methods.
+ Ignore Sitemap: lines in robots.txt [RT#42420]
+ Refactor; use variable to hold the test port
+ Add redirects method to HTTP::Message
+ Setting $ua->max_redirect(0) didn't work [RT#40260]
+ Convert files to UTF-8
+
+Zefram (2):
+ HTTP::Cookies destructor should not clobber $! and other globals.
+ Deal with the Encode module distributed with perl-5.8.0
+
+Ian Kilgore (1):
+ Avoid failure if 127.0.0.1:8333 is in use [RT#42866]
+
+Ville Skyttä (1):
+ Documentation improvements, spelling fixes.
+
+
+
+_______________________________________________________________________________
+2008-12-05 Release 5.823
+
+Gisle Aas (4):
+ Bring back the LWP::Debug code [RT#41759]
+ Add documentation section about 'Network traffic monitoring'.
+ Typo fixes
+ Want to ensure we get a single value back here.
+
+
+
+_______________________________________________________________________________
+2008-12-05 Release 5.822
+
+Gisle Aas (4):
+ A more modern user_agent example.
+ Make it possible to unset the proxy settings again
+ Prefer use specified Content-Length header [RT#41462]
+ Deprecate LWP::Debug
+
+
+
+_______________________________________________________________________________
+2008-11-25 Release 5.821
+
+Various bug fixes.
+
+
+Gisle Aas (3):
+ The Content-Length and Content-MD5 headers are no longer valid after encode/decode
+ Add META information
+ croak on bad proxy args [RT#39919]
+
+Slaven Rezic (1):
+ Skip a failing decoded_content on systems without Encode.pm [RT#40735]
+
+Steve Hay (1):
+ Skip LWP test when fork() is unimplemented
+
+Yuri Karaban (1):
+ redo used incorrectly in LWP::Protocol::http [RT#41116]
+
+jefflee (1):
+ HTTP::Cookies::Microsoft now handles user names with space [RT#40917]
+
+ruff (1):
+ Avoid aborting requests saved to file early [RT#40985]
+
+
+
+_______________________________________________________________________________
+2008-11-05 Release 5.820
+
+Main news is the ability to control the heuristics used to determine
+the expiry time for response objects.
+
+
+Gisle Aas (8):
+ Reformat later parts of Changes
+ Add a paragraph to summarize the motivation for releases since 5.815
+ all_pod_files_ok();
+ Fix POD markup error
+ Calculation of current_age with missing Client-Date.
+ The age/freshness methods now take an optional 'time' argument
+ More correct matching of 'max-age' in freshness_lifetime method
+ The freshness_lifetime method now support options to control its heuristics
+
+
+_______________________________________________________________________________
+2008-10-20 Release 5.819
+
+Release 5.815 to 5.818 had a severe bug that sometimes made LWP not
+collect all data for the responses it received. This release is
+strongly recommended as an upgrade for those releases.
+
+
+Gisle Aas (2):
+ Don't override $Net::HTTPS::SSL_SOCKET_CLASS if it's already set.
+ Wrong content handlers would sometimes be skipped [RT#40187]
+
+
+_______________________________________________________________________________
+2008-10-16 Release 5.818
+
+Main news in this release is the addition of the dump() method to the
+request and response objects. If found that I often ended up printing
+$resp->as_string for debugging and then regretting after the terminal
+got into a strange mode or just kept on scrolling for the longest
+time.
+
+
+Gisle Aas (8):
+ Use deflate compression instead of gzip for the test
+ Simplify; Get rid of the $USE_STORABLE_DCLONE configuration
+ Add dump method to HTTP::Message.
+ Use $res->dump instead of rolling our own.
+ Layout change; move headers() methods to a more logical place.
+ Add support for x-bzip2 encoding; fix bzip2 decoding.
+ Add send_header method to HTTP::Daemon
+ Make the lwp-request User-Agent string include the LWP version.
+
+Slaven Rezic (1):
+ clone implemented in terms of Storable::dclone [RT#39611]
+
+
+_______________________________________________________________________________
+2008-10-10 Release 5.817
+
+This is the release where I played around with Devel::NYTProf to
+figure where time was actually spent during the processing of requests
+with LWP. The methods that manipulated header objects stood out, so
+this release contains a few tweaks to make those parts faster.
+
+I also figured a few convenience methods to encode and decode the
+content of request/response objects would be in order.
+
+
+Gisle Aas (16):
+ Should store "wire" headers field names with _ without translation.
+ Test HTTP::Request->parse().
+ Restore pre-5.815 behaviour of returning "400 Bad Request" [RT#39694]
+ Rename the HTTP::Status constants to have HTTP_ prefix
+ Detection of unsupported schemes was broken [RT#37637]
+ Allow tainted methods to be forwarded to HTTP::Headers [RT#38736]
+ Add strict mode to HTML::Form
+ Fix test now that less warnings are generated.
+ Add content_is_xml method
+ Make split_header_words() lower case returned tokens/keys
+ Avoid invoking AUTOLOAD on object destruction [RT#39852]
+ Add decode() method to HTTP::Message
+ Add encode() method to HTTP::Message
+ Allow multiple fields to be set with push_header().
+ Make content_type and content_is_xhtml methods faster
+ Faster push_header()
+
+
+_______________________________________________________________________________
+2008-09-29 Release 5.816
+
+Oops, release 5.815 broke download-to-file on Windows.
+
+
+Gisle Aas (2):
+ Add missing binmode() [RT#39668]
+ Doc tweaks
+
+
+_______________________________________________________________________________
+2008-09-24 Release 5.815
+
+The main change this time is the introduction of handlers to drive the
+processing of requests in LWP::UserAgent. You can also register your
+own handlers for modifying and processing requests or responses on
+their way, which I think is a much more flexible approach that trying
+to subclass LWP::UserAgent to customize it. If we have had these
+early on then the LWP::UserAgent API could have been so much simpler
+as the effect of most current attributes can easily be set up with
+trivial handlers.
+
+Also thanks to contributions by Bron Gondwana LWP's Basic/Digest
+authenticate modules now registers handlers which allow them to
+automatically fill in the Authorization headers without first taking
+the round-trip of a 401 response when LWP knows the credentials for a
+given realm.
+
+
+Gisle Aas (23):
+ We don't need to build the docs to run the tests.
+ Style tweaks.
+ The jigsaw service isn't up to much good these days.
+ HTTP::Cookies produces warnings for undefined cookie param names [RT#38480]
+ Typo fix; HTTP::Message will never include x-bzip2 in Accept-Encoding [RT#38617]
+ Added HTTP::Config module
+ Add methods to configure processing handlers.
+ 100 Continue response not complete.
+ Use 3-arg open when response content to files.
+ Make the show_progress attribute official (by documenting it).
+ Start using handlers for driving the inner logic of LWP::UserAgent.
+ Expose the content_is_html and content_is_xhtml methods from HTTP::Headers.
+ Make credentials method able to get/set values.
+ An auth handler per realm.
+ Match proxy setting for request.
+ Set up separate handler for adding proxy authentication.
+ Add request_preprepare to be able to initialize proxy early enough.
+ Smarter get_my_handler() that can also create handlers.
+ Refactor; introduce run_handlers method
+ Pass in handler hash to the handler callback.
+ Don't let version=1 override behaviour if specified with a plan Set-Cookie header.
+ Remove handler when we don't have a username/password for the realm.
+ Make tests use Test.pm
+
+Bron Gondwana (2):
+ Double-check that username or password has changed after a failed login.
+ Update Digest Authen to subclass Basic.
+
+Ondrej Hanak (1):
+ Avoid running out of filehandles with DYNAMIC_FILE_UPLOAD.
+
+Todd Lipcon (1):
+ Fixed parsing of header values starting with ':' [RT#39367]
+
+amire80 (1):
+ Documentation typo fixes [RT#38203]
+
+
+_______________________________________________________________________________
+2008-07-25 Release 5.814
+
+Gisle Aas (13):
+ Typo fix.
+ Add HTTP::Message::decodable()
+ Use decoded_content in the synopsis
+ Avoid adding an empty first part in $mess->add_part()
+ Get rid of all the manual dependency tests.
+ Simplify the Makefile.PL (no interactivity)
+ Provide DELETE method in HTTP::Request::Common [RT#37481]
+ Checkbox picks up nearby text in description of alternates [RT#36771]
+ HTML::Form::possible_values() should not returned disabled values [RT#35248]
+ File::Listing documentation claimed only 'unix' format was supported [RT#22021]
+ File::Listing only support English locales [RT#28879]
+ Make common-req.t use Test.pm
+ Typo; CAN_TALK_TO_OUTSELF
+
+Bill Mann (1):
+ Fix up File::Listings fallback to dosftp [RT#23540]
+
+Hans-H. Froehlich (1):
+ File::Listing parse failure on BSD Linux based systems [RT#26724]
+
+
+_______________________________________________________________________________
+2008-06-17 Release 5.813
+
+Ville Skytta (3):
+ RobotUA constructor ignores delay, use_sleep [RT#35456]
+ Spelling fixes [RT#35457]
+ Add HTTP::Response->filename [RT#35458]
+
+Mark Stosberg (2):
+ Better diagnostics when the HTML::TokeParser constructor fails [RT#35607]
+ Multiple forms with same-named <select> parse wrongly [RT#35607]
+
+Gisle Aas (1):
+ Provide a progress method that does something that might be useful.
+
+Spiros Denaxas (1):
+ Documentation typo fix [RT#36132]
+
+
+_______________________________________________________________________________
+2008-04-16 Release 5.812
+
+Gisle Aas (6):
+ Typo fix.
+ Simplified Net::HTTP::Methods constructor call.
+ Croak if Net::HTTP constructor called with no argument.
+ Avoid calling $self->peerport to figure out what the port is.
+ 5.811 breaks SSL requests [RT#35090]
+ Make test suite compatible with perl-5.6.1.
+
+Toru Yamaguchi (1):
+ Wrong treatment of qop value in Digest Authentication [RT#35055]
+
+
+_______________________________________________________________________________
+2008-04-14 Release 5.811
+
+Gisle Aas (6):
+ Avoid "used only once" warning for $Config::Config.
+ Make HTTP::Request::Common::PUT set Content-Length header [RT#34772]
+ Added the add_content_utf8 method to HTTP::Message.
+ Typo fix.
+ Retry syscalls when they fail with EINTR or EAGAIN [RT#34093,32356]
+ Allow HTTP::Content content that can be downgraded to bytes.
+
+Gavin Peters (1):
+ HTML::Form does not recognise multiple select items with same name [RT#18993]
+
+Mark Stosberg (1):
+ Document how HTTP::Status codes correspond to the classification functions [RT#20819]
+
+Robert Stone (1):
+ Allow 100, 204, 304 responses to have content [RT#17907]
+
+sasao (1):
+ HTTP::Request::Common::POST suppressed filename="0" in Content-Disposition [RT#18887]
+
+
+_______________________________________________________________________________
+2008-04-08 Release 5.810
+
+Gisle Aas (10):
+ Small documentation issues [RT#31346]
+ Explain $netloc argument to $ua->credentials [RT#31969]
+ Make lwp-request honour option -b while dumping links [RT#31347]
+ Ignore params for date convenience methods [RT#30579]
+ Get rid of all the old CVS $Keyword:...$ templates. Set $VERSION to 5.810.
+ Update Copyright year.
+ Drop some sample URLs that were failing.
+ Complement the HTTP::Status codes [RT#29619]
+ Don't allow HTTP::Message content to be set to Unicode strings.
+ Refactor test for Encode.pm
+
+Ville Skytta (3):
+ Spelling fixes [RT#33272]
+ Trigger HTML::HeadParser for XHTML [RT#33271]
+ Escape status line in error_as_HTML, convert to lowercase [RT#33270]
+
+Alexey Tourbin (2):
+ Typo fix [RT#33843]
+ Protocol/file.pm: postpone load of URI::Escape and HTML::Entities [RT#33842]
+
+Daniel Hedlund (1):
+ HTML::Form Module and <button> element clicks
+
+Adam Kennedy (1):
+ HTTP::Cookies handle malformed empty Set-Cookie badly [RT#29401]
+
+Jacob J (1):
+ [HTTP::Request::Common] Does not handle filenames containing " [RT#30538]
+
+Rolf Grossmann (1):
+ Allow malformed chars in $mess->decoded_content [RT#17368]
+
+FWILES (1):
+ Croak if LWP::UserAgent is constructed with hash ref as argument [RT#28597]
+
+Adam Sjogren (1):
+ Disabled, checked radiobutton being submitted [RT#33512]
+
+DAVIDRW (1):
+ warn if TextInput's maxlength exceeded [RT#32239]
+
+
+_______________________________________________________________________________
+2007-08-05 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.808
+
+ Get rid of t/live/validator test. Too much JavaScript madness
+ for it to be a sane LWP test.
+
+
+
+2007-07-31 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.807
+
+ Apply patch correction from CPAN RT #26152
+
+ More laxed t/live/validator test.
+
+
+
+2007-07-19 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.806
+
+ Added progress callback to LWP::UserAgent.
+
+ HTTP::Daemon didn't avoid content in responses to HEAD requests
+
+ Add support for HTTP Expect header to HTTP::Daemon (CPAN RT #27933)
+
+ Fix t/base/message.t so tests are skipped if Encode is not
+ installed. (CPAN RT #25286)
+
+ Add HTML::Tagset as a prerequisite to Makefile.PL
+
+ Do not clobber $_ in LWP::Protocol::nntp (CPAN RT #25132)
+
+ Fix lwp-download so it can download files with an "_" in the filename
+ (CPAN RT#26207)
+
+ Quiet complaints from HTML::HeadParser when dealing with undecoded
+ UTF-8 data. (CPAN RT#20274)
+
+ When both IO::Socket::SSL and Net::SSL are loaded, use the latter
+ (CPAN RT #26152)
+
+ Allows SSL to work much more reliably:
+ (CPAN RT #23372)
+
+ Allow text/vnd.wap.wml and application/vnd.oasis.opendocument.text
+ in content-type field in lwp-request (CPAN RT #26151)
+
+ Add default media type for XML in LWP::MediaTypes (CPAN RT #21093)
+
+ Added chunked test by Andreas J. Koenig
+
+
+
+2005-12-08 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.805
+
+ HTTP::Date: The str2time function returned wrong values for
+ years in the early 20th century, because timelocal() actually
+ expects the year to be provided on a different scale than what
+ localtime() returns.
+
+ HTTP::Headers can now be constructed with field names that repeat.
+ The $h->header function now also accept repeating field
+ names and can also remove headers if passed undef as value.
+
+ HTML::Form: The parse method now takes hash style optional
+ arguments and the old verbose behaviour is now off by default.
+
+ HTML::Form: Accept <select multiple=""> for compatibility with
+ other browsers. Patch by Josh Rai <josh@rai.name>.
+
+ HTML::Form: Sane handling of 'disabled' for ListInput.
+ Based on patch by Joao Lopes <developer@st3tailor.com.br>.
+
+ HTTP::Negotiate: Fixed matching of partial language tags.
+ Patch contributed by Dan Kubb.
+
+ HTTP::Response: The as_string method now returns a status line
+ that doesn't add the "official" code name in the message
+ field. This improves the ability to round-trip response objects
+ via HTTP::Response->parse($res->as_string) and makes the first
+ line of the string returned agree with $res->status_line.
+
+ Net::HTTP: The host attribute can now be set undef in
+ order to suppress this header for HTTP/1.0 requests.
+
+ Net::HTTP: The default Host: header does not include the
+ port number if it is the default (80 for plain HTTP). Some
+ servers get confused by this.
+
+ Net::HTTP: Ignore bogus Content-Length headers. Don't get
+ confused by leading or trailing whitespace.
+
+ LWP::Protocol::http: More efficient sending of small PUT/POST
+ requests by trying harder to pass off the whole request in a
+ single call to syswrite.
+
+ lwp-request now give better error messages if you used the
+ -o option without having the HTML-Tree distribution installed.
+ Also document this dependency.
+
+
+
+2005-12-06 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.804
+
+ HTTP::Message->parse did not work when the first line of the body
+ was something that looked like a header.
+
+ HTTP::Header::Auth needs HTTP::Headers to be loaded before
+ it replace its functions.
+
+ LWP::Protocol::nntp improvements by Ville Skyttä <ville.skytta@iki.fi>:
+ - Support the nntp: scheme.
+ - Support hostname in news: and nntp: URIs.
+ - Close connection and preserve headers also in non-OK responses.
+ - HEAD support for URIs identifying a newsgroup.
+ - Comment spelling fixes.
+
+ Fix quotes in Net::HTTP example.
+ http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=283916
+
+ Detect EOF when expecting a chunk header. Should address the
+ warnings shown in http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286775
+
+ WWW::RobotRules: Improved parsing of not strictly valid robots.txt files
+ contributed by <Matthew.van.Eerde@hbinc.com>.
+
+ Makefile.PL: Set name to LWP so that the .packlist ends up in the
+ expected place.
+
+
+
+2004-12-11 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.803
+
+ HTTP::Message: $mess->decoded_content sometimes had the side
+ effect of destroying $mess->content.
+
+ HTTP::Message: $mess->decoded_content failed for
+ "Content-Encoding: deflate" if the content was not in the
+ zlib-format as specified for the HTTP protocol. Microsoft got
+ this wrong, so we have to support raw deflate bytes as well.
+
+ HTTP::Response->parse don't require the protocol to be
+ specified any more. This allows HTTP::Response->parse($resp->as_string)
+ to round-trip. Patch by Harald Joerg <haj@oook.m.uunet.de>.
+
+ HTTP::Response: $resp->base might now return undef. Previously
+ it would croak if there was no associated request. Based on
+ patch by Harald Joerg <haj@oook.m.uunet.de>.
+
+ HTML::Form now support <label> for check- and radio boxes.
+ Patch contributed by Dan Kubb <dan.kubb@onautopilot.com>.
+
+ Make HTTP::Daemon subclassable, patch by Kees Cook <kees@osdl.org>.
+
+ lwp-download allow directory to save into to be specified.
+ Patch by Radoslaw Zielinski <radek@karnet.pl>.
+
+ lwp-download will validate filename derived from server
+ controlled data and will fail if something looks not
+ quite right.
+
+
+
+2004-11-30 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.802
+
+ The HTTP::Message object now has a decoded_content() method.
+ This will return the content after any Content-Encodings and
+ charsets have been decoded.
+
+ Compress::Zlib is now a prerequisite module.
+
+ HTTP::Request::Common: The POST() function created an invalid
+ Content-Type header for file uploads with no parameters.
+
+ Net::HTTP: Allow Transfer-Encoding with trailing whitespace.
+ <http://rt.cpan.org/Ticket/Display.html?id=3929>
+
+ Net::HTTP: Don't allow empty content to be treated as a valid
+ HTTP/0.9 response.
+ <http://rt.cpan.org/Ticket/Display.html?id=4581>
+ <http://rt.cpan.org/Ticket/Display.html?id=6883>
+
+ File::Protocol::file: Fixup directory links in HTML generated
+ for directories. Patch by Moshe Kaminsky <kaminsky@math.huji.ac.il>.
+
+ Makefile.PL will try to discover misconfigured systems that
+ can't talk to themselves and disable tests that depend on this.
+
+ Makefile.PL will now default to 'n' when asking about whether
+ to install the "GET", "HEAD", "POST" programs. There has been
+ too many name clashes with these common names.
+
+
+
+2004-11-12 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.801
+
+ HTTP::Message improved content/content_ref interaction. Fixes
+ DYNAMIC_FILE_UPLOAD and other uses of code content in requests.
+
+ HTML::Form:
+ - Handle clicking on nameless image.
+ - Don't let $form->click invoke a disabled submit button.
+
+ HTTP::Cookies could not handle a "old-style" cookie named
+ "Expires".
+
+ HTTP::Headers work-around for thread safety issue in perl <= 5.8.4.
+
+ HTTP::Request::Common improved documentation.
+
+ LWP::Protocol: Check that we can write to the file specified in
+ $ua->request(..., $file) or $ua->mirror.
+
+ LWP::UserAgent clone() dies if proxy was not set. Patch by
+ Andy Lester <andy@petdance.com>
+
+ HTTP::Methods now avoid "use of uninitialized"-warning when server
+ replies with incomplete status line.
+
+ lwp-download will now actually tell you why it aborts if it runs
+ out of disk space of fails to write some other way.
+
+ WWW::RobotRules: only display warning when running under 'perl -w'
+ and show which robots.txt file they correspond to. Based on
+ patch by Bill Moseley.
+
+ WWW::RobotRules: Don't empty cache when agent() is called if the
+ agent name does not change. Patch by Ville Skyttä <ville.skytta@iki.fi>.
+
+
+
+2004-06-16 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.800
+
+ HTML::Form will allow individual menu entries to be disabled.
+ This was needed to support <input type=radio disabled value=foo>
+ and <select><option disabled>foo.
+
+ HTML::Form now avoids name clashes between the <select> and
+ <option> attributes.
+
+ HTML::Form now implicitly close <select> elements when it sees
+ another input or </form>. This is closer to the MSIE behaviour.
+
+ HTML::Form will now "support" keygen-inputs. It will not
+ calculate a key by itself. The user will have to set its
+ value for it to be returned by the form.
+
+ HTTP::Headers now special case field names that start with a
+ ':'. This is used as an escape mechanism when you need the
+ header names to not go through canonicalization. It means
+ that you can force LWP to use a specific casing and even
+ underscores in header names. The ugly $TRANSLATE_UNDERSCORE
+ global has been undocumented as a result of this.
+
+ HTTP::Message will now allow an external 'content_ref'
+ to be set. This can for instance be used to let HTTP::Request
+ objects pick up content data from some scalar variable without
+ having to copy it.
+
+ HTTP::Request::Common. The individual parts will no longer
+ have a Content-Length header for file uploads. This improves
+ compatibility with "normal" browsers.
+
+ LWP::Simple doc patch for getprint.
+ Contributed by Yitzchak Scott-Thoennes <sthoenna@efn.org>.
+
+ LWP::UserAgent: New methods default_header() and
+ default_headers(). These can be used to set up headers that
+ are automatically added to requests as they are sent. This
+ can for instance be used to initialize various Accept headers.
+
+ Various typo fixes by Ville Skyttä <ville.skytta@iki.fi>.
+
+ Fixed test failure under perl-5.005.
+
+ LWP::Protocol::loopback: This is a new protocol handler that
+ works like the HTTP TRACE method, it will return the request
+ provided to it. This is sometimes useful for testing. It can
+ for instance be invoked by setting the 'http_proxy' environment
+ variable to 'loopback:'.
+
+
+
+2004-04-13 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.79
+
+ HTML::Form now exposes the 'readonly' and 'disabled'
+ attribute for inputs. This allows your program to simulate
+ JavaScript code that modifies these attributes.
+
+ RFC 2616 says that http: referer should not be sent with
+ https: requests. The lwp-rget program, the $req->referer method
+ and the redirect handling code now try to enforce this.
+ Patch by Ville Skyttä <ville.skytta@iki.fi>.
+
+ WWW::RobotRules now look for the string found in
+ robots.txt as a case insensitive substring from its own
+ User-Agent string, not the other way around.
+ Patch by Ville Skyttä <ville.skytta@iki.fi>.
+
+ HTTP::Headers: New method 'header_field_names' that
+ return a list of names as suggested by its name.
+
+ HTTP::Headers: $h->remove_content_headers will now
+ also remove the headers "Allow", "Expires" and
+ "Last-Modified". These are also part of the set
+ that RFC 2616 denote as Entity Header Fields.
+
+ HTTP::Headers: $h->content_type is now more careful
+ in removing embedded space in the returned value.
+ It also now returns all the parameters as the second
+ return value as documented.
+
+ HTTP::Headers: $h->header() now croaks. It used to
+ silently do nothing.
+
+ HTTP::Headers: Documentation tweaks. Documented a
+ few bugs discovered during testing.
+
+ Typo fixes to the documentation all over the place
+ by Ville Skyttä <ville.skytta@iki.fi>.
+
+ Updated tests.
+
+
+
+2004-04-07 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.78
+
+ Removed stray Data::Dump reference from test suite.
+
+ Added the parse(), clear(), parts() and add_part() methods to
+ HTTP::Message. The HTTP::MessageParts module of 5.77 is no more.
+
+ Added clear() and remove_content_headers() methods to
+ HTTP::Headers.
+
+ The as_string() method of HTTP::Message now appends a newline
+ if called without arguments and the non-empty content does
+ not end with a newline. This ensures better compatibility with
+ 5.76 and older versions of libwww-perl.
+
+ Use case insensitive lookup of hostname in $ua->credentials.
+ Patch by Andrew Pimlott <andrew@pimlott.net>.
+
+
+
+2004-04-06 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.77
+
+ LWP::Simple did not handle redirects properly when the "Location"
+ header used uncommon letter casing.
+ Patch by Ward Vandewege <ward@pong.be>.
+
+ LWP::UserAgent passed the wrong request to redirect_ok().
+ Patch by Ville Skyttä <ville.skytta@iki.fi>.
+ https://rt.cpan.org/Ticket/Display.html?id=5828
+
+ LWP did not handle URLs like http://www.example.com?foo=bar
+ properly.
+
+ LWP::RobotUA construct now accept key/value arguments in the
+ same way as LWP::UserAgent.
+ Based on patch by Andy Lester <andy@petdance.com>.
+
+ LWP::RobotUA did not parse robots.txt files that contained
+ "Disallow:" using uncommon letter casing.
+ Patch by Liam Quinn <liam@htmlhelp.com>.
+
+ WWW::RobotRules now allow leading space when parsing robots.txt
+ file as suggested by Craig Macdonald <craig@freeasphost.co.uk>.
+ We now also allow space before the colon.
+
+ WWW::RobotRules did not handle User-Agent names that use complex
+ version numbers. Patch by Liam Quinn <liam@htmlhelp.com>.
+
+ Case insensitive handling of hosts and domain names
+ in HTTP::Cookies.
+ https://rt.cpan.org/Ticket/Display.html?id=4530
+
+ The bundled media.types file now match video/quicktime
+ with the .mov extension, as suggested by Michel Koppelaar
+ <Michel.Koppelaar@kb.nl>.
+
+ Experimental support for composite messages, currently
+ implemented by the HTTP::MessageParts module. Based on
+ ideas from Joshua Hoblitt <jhoblitt@ifa.hawaii.edu>.
+
+ Fixed libscan in Makefile.PL.
+ Patch by Andy Lester <andy@petdance.com>.
+
+ The HTTP::Message constructor now accept a plain array reference
+ as its $headers argument.
+
+ The return value of the HTTP::Message as_string() method now
+ better conforms to the HTTP wire layout. No additional "\n"
+ are appended to the as_string value for HTTP::Request and
+ HTTP::Response. The HTTP::Request as_string now replace missing
+ method or URI with "-" instead of "[NO METHOD]" and "[NO URI]".
+ We don't want values with spaces in them, because it makes it
+ harder to parse.
+
+
+
+2003-11-21 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.76
+
+ Revised handling of redirects.
+ - clear our content and content headers if we
+ rewrite request as GET based on patch by
+ Steven Butler <stevenb@kjross.com.au>.
+ - pass $response to redirect_ok()
+
+ Support cpan:-URLs. Try 'lwp-download cpan:src/latest.tar.gz' :)
+
+ Fix test failure in 't/html/form.t' for perl5.005.
+
+
+
+2003-10-26 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.75
+
+ Reworked LWP::UserAgent, HTTP::Request and HTTP::Response
+ documentation. Also other documentation tweaks.
+
+
+
+2003-10-23 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.74
+
+ Improved lwp-download program:
+ - set mtime if Last-Modified header reported by server
+ - better prompts
+ - avoid warnings when aborted at the wrong time
+
+ Collected all contributions in the AUTHORS file and
+ also added an AUTHORS section to the LWP manpage.
+
+ Style tweaks to all modules. Move POD after __END__
+ and uncuddled elses.
+
+
+
+2003-10-19 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.73
+
+ Takanori Ugai <ugai@jp.fujitsu.com> found that 'max_redirect'
+ introduced in 5.72 was broken and provided a patch for that.
+
+ Not all ftp servers return 550 responses when trying to
+ to RETR a directory. Microsoft's IIS is one of those.
+ Patch provided by Thomas Lotterer <thl@dev.de.cw.com>.
+
+ Some documentation tweaks.
+
+
+
+2003-10-15 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.72
+
+ Requests for some non-HTTP URLs would fail if the cookie_jar
+ was enabled. The HTTP::Cookies::add_cookie_header now ignore
+ non-HTTP requests.
+
+ The new local/http test failed on Windows because of a missing
+ binmode().
+
+ Suppress Client-SSL-Warning warning header when Crypt::SSLeay
+ is able to verify the peer certificate. Patch contributed by
+ Joshua Chamas <joshua@chamas.com>.
+
+ HTTP::Request::Common::POST did not add a 'Content-Length' header
+ when the content ended up empty. Fixed by a patch contributed
+ by Brian J. Murrell.
+
+ Internally generated responses now contain a text/plain part
+ that repeats the status line. They also have a "Client-Warning"
+ header that can be used to differentiate these responses from
+ real server responses.
+
+ LWP::UserAgent now deals with 303 and 307 redirects. The behaviour
+ of 302 redirects has also changed to be like 303; i.e. change the
+ method to be "GET". This is what most browsers do. Based on
+ a patch contributed by Tom Hughes <thh@cyberscience.com>.
+
+ LWP::UserAgent now implements a 'max_redirect' attribute with a
+ default value of 7. This should also fix the problem where
+ redirects to the same URL to get a cookie set did not work.
+ Based on a patch by Sean M. Burke <sburke@cpan.org>.
+
+ NTLM authentication should continue to fail if the Authen::NTLM
+ module can't be loaded. LWP used to think the scheme was
+ available as soon as the module stash showed up. Not it looks
+ for the authenticate method to be defined. Patch by Jan Dubois.
+
+ lwp-download will not try to rename *.tbz and *.tar.bz2 to
+ match the reported content type. Patch contributed by
+ Robin Barker <Robin.Barker@npl.co.uk>.
+
+ HTTP::Cookies::Netscape documentation fix by Sean M. Burke.
+
+ HTTP::Cookies::Microsoft documentation fix by Johnny Lee.
+
+ The code that tries to avoid installing 'HEAD' on top of
+ 'head' on systems like Mac OS was further improved to look
+ in $Config{installscript} instead of $Config{sitebin}.
+ Patch provided by Ken Williams <ken@mathforum.org>.
+
+
+
+2003-10-14 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.71
+
+ Support open-ended Range: header for ftp requests.
+ Patch by Matthew Eldridge <eldridge@Graphics.Stanford.EDU>.
+
+ lwp-request now prints unsuccessful responses in the same way
+ as successful ones. The status will still indicate failures.
+ Based on a patch by Steve Hay <steve.hay@uk.radan.com>.
+
+ HTML::Form's dump now also print alternative value names.
+
+ HTML::Form will now pick up the phrase after a <input type=radio>
+ or <input type=checkbox> and use that as the name of the checked
+ value.
+
+ HTML::Form's find_input now returns all inputs that match in
+ array context. Based on patch by Mark Stosberg <markstos@cpan.org>
+ in <http://rt.cpan.org/Ticket/Display.html?id=3320>.
+
+ HTTP::Daemon's send_file() method did not work when given
+ a file name. Patch by Dave W. Smith <dws@postcognitive.com>.
+
+ HTTP::Daemon is less strict about field names in the request
+ headers is received. The Norton Internet Security firewall
+ apparently likes to send requests with a header called
+ '~~~~~~~~~~~~~~~'. Further details in
+ <http://rt.cpan.org/Ticket/Display.html?id=2531>.
+
+ LWP::Protocol::http assumed $1 would be meaningful without
+ testing the outcome of the match. This sometimes produced
+ an extra garbage Authentication header.
+ Based on the patch by <bai@dreamarts.co.jp> in
+ <http://rt.cpan.org/Ticket/Display.html?id=1994>.
+
+ LWP::Protocol::mailto will try harder to locate the sendmail
+ program as suggested in <http://rt.cpan.org/Ticket/Display.html?id=2363>.
+ Also let $ENV{SENDMAIL} override the search.
+
+ Patch to enable OS/2 build by Ilya Zakharevich.
+
+
+
+2003-10-13 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.70
+
+ File::Listing::apache by Slaven Rezic <slaven@rezic.de>
+
+ HEAD requests now work properly for ftp: URLs.
+ Patch by Ville Skyttä <ville.skytta@iki.fi>.
+
+ LWP::UserAgent: The protocols_allowed() and protocols_forbidden()
+ methods are now case insensitive. Patch by Ville Skyttä
+ <ville.skytta@iki.fi>.
+
+ Avoid warning from HTTP::Date on certain invalid dates.
+ Patch by David Dick <david_dick@iprimus.com.au>.
+
+ HTML::Form::param() is an alternative interface for inspecting
+ and modifying the form values. It resembles the interface
+ of CGI.
+
+ HTML::Form documentation updated. Lots of typo fixes and improves
+ by Martin Thurn <mthurn@northropgrumman.com>.
+
+ HTML::Form will treat any unknown input types as text input.
+ This appears to be what most browsers do.
+
+ HTML::Form::parse() can now take a HTTP::Response object
+ as argument.
+
+ The "checkbox" and "option" inputs of HTML::Form can now be
+ turned on with the new check() method.
+
+ The inputs of HTML::Form can now track alternative value
+ names and allow values to be set by these names as well.
+ Currently this is only supported for "option" inputs.
+
+ HTML::Form's dump() method now print the name of the form if
+ present.
+
+
+
+2003-01-24 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.69
+
+ Include lwptut.pod contributed by Sean M. Burke C<sburke@cpan.org>.
+
+ The lwp-request aliases GET, HEAD, POST where installed when
+ no program should be. Fixed by David Miller <dave@justdave.net>.
+
+ lwp-rget --help don't print double usage any more.
+
+ HTTP::Header::Util is now more reluctant to put quotes around
+ token values.
+
+ Net::HTTP: Avoid warning on unexpected EOF when reading chunk
+ header.
+
+
+
+2003-01-02 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.68
+
+ Fix test failure for t/html/form.t when running under
+ perl-5.8.0.
+
+
+
+2003-01-01 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.67
+
+ HTTP::Cookies::Microsoft contributed by Johnny Lee <typo_pl@hotmail.com>.
+ This module makes it possible for LWP to share MSIE's cookies.
+
+ HTML::Form supports file upload better now. There are some
+ new methods on that kind of input; file(), filename(), content()
+ and headers().
+
+ Removed unfinished test that depended on Data::Dump.
+
+ Net::HTTP avoids exceptions in read_response_headers() with
+ laxed option. It now always assumes HTTP/0.9 on unexpected
+ responses.
+
+ HTML::Form documentation reworked.
+
+
+
+2002-12-20 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.66
+
+ Various patches from Sean M. Burke. Most of them to
+ match up LWP with the "Perl & LWP" book.
+
+ LWP::DebugFile module contributed by Sean.
+
+ LWP::Authen::Ntml contributed by James Tillman.
+
+ HTTP::Daemon patch for Alpha by <shildreth@emsphone.com>
+
+ The format_chunk() and write_chunk() methods of Net::HTTP
+ did not work. Bug spotted by Yale Huang <yale@sdf-eu.org>.
+
+ The Client-Peer response header is back.
+
+
+
+2002-05-31 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.65
+
+ Make HTTP::Date compatible with perl 5.8.
+
+ Try to avoid to default to overwriting /usr/bin/head
+ on MacOS X when the perl install prefix is /usr/bin.
+
+ HTTP::Cookies fix for parsing of Netscape cookies file
+ on MS Windows. Patch by by Sean M. Burke <sburke@cpan.org>.
+
+ HTTP::Negotiate doc patch from Edward Avis <epa98@doc.ic.ac.uk>.
+
+
+
+2002-02-09 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.64
+
+ Simplified the Makefile.PL:
+ - the scripts are not longer *.PL files
+
+ - don't try to make symlinks for GET, HEAD, POST
+ as that has not worked for a long time
+
+ - the GET, HEAD, POST aliases for lwp-request should
+ now work on Windows.
+
+ HTTP::Cookies:
+ - added 'clear_temporary_cookies' method;
+ patch by Mike Schilli <schilli1@pacbell.net>.
+
+ - trailing space in old cookie parameters not ignored;
+ patch by Ivan Panchenko
+
+ - protect against $SIG{__DIE__} handlers;
+ patch by Adam Newby <adam@NewsNow.co.uk>.
+
+ LWP::Authen::Digest:
+ - abort digest auth session if we fail repeatedly with
+ the same username/password.
+
+ MacOS portability patches to the test suite by
+ Chris Nandor <pudge@pobox.com>.
+
+
+
+2001-12-14 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.63
+
+ HTTP::Negotiate: Deal with parameter names in a case
+ insensitive way. Put a little weight on the order of features
+ in the Accept headers.
+
+ LWP::UserAgent: make sure that the 'request' attribute is
+ always set on the returned response.
+
+ LWP::Protocol::http will now allow certain bad headers
+ in the responses it reads. The bad headers end up in the
+ header 'Client-Junk'.
+
+ Net::HTTP new options to the 'read_response_headers'
+ method. The option 'laxed' will make it ignore bad header
+ lines. The option 'junk_out' can be used to pass in an
+ array reference. Junk header lines are pushed onto it.
+
+ Net::HTTP::Methods: fixed the internal zlib_ok() to also
+ return the correct value the first time.
+
+ LWP::Protocol::http: Ensure that we don't read until
+ select has said it is ok since we have put the socket
+ in non-blocking mode. Previously this could happen if
+ you set the 'timeout' attribute of the user agent to 0.
+
+ LWP::Authen::Digest now use Digest::MD5 instead of MD5.
+
+ Some adjustments to Makefile.PL to figure out if
+ Compress::Zlib is available and adjust the test suite
+ accordingly.
+
+
+
+2001-11-21 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.62
+
+ The $VERSION of LWP::UserAgent wrapped around. This confused the
+ CPAN indexer. Bumped the major number to 2 to fix this.
+
+ Net::HTTP did not work well on perl5.003. The PeerHost argument
+ to IO::Socket::INET was not recognized, so we had to use PeerAddr
+ instead. The syswrite() method also required a length argument.
+
+ Net::HTTP did not deal with transfer encoding tokens in a
+ case-insensitive way. Patch by Blair Zajac <blair@orcaware.com>.
+
+ The jigsaw-chunk test failed on MacOS because "\n" is different.
+ Patch by Chris Nandor <pudge@pobox.com>.
+
+
+
+2001-11-16 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.61
+
+ LWP::Protocol::http did not invoke its _fixup_header method.
+ The result was that the 'Host' header got completely wrong
+ when talking through a proxy server.
+
+ The live Google test is gone. They blocked us.
+
+ The guts of Net::HTTP has moved to Net::HTTP::Methods.
+
+ Net::HTTP now has limits on the size of the header which are
+ set by default.
+
+ New module Net::HTTPS.
+
+ Documentation tweaks.
+
+ HTTP::Headers: The 'remove_header' method now return the values
+ of the fields removed as suggested by Blair Zajac <blair@orcaware.com>.
+ Also a typo fix by Blair.
+
+ HTTP::Message: The delegation via &AUTOLOAD should be slightly
+ faster now. It will install a real forwarding function the
+ first time it is called for each HTTP::Headers method.
+
+ LWP::UserAgent: Don't forward 'Cookie' headers on redirect.
+ Patch by Steve A Fink <steve@fink.com>.
+
+ LWP::Protocol::http has been reorganized to make it simpler
+ to subclass it. Other minor changes to it include:
+ - Client-Warning is gone
+ - Client-Request-Num renamed to Client-Response-Num
+ - A 'Transfer-Encoding' header is rewritten into a
+ 'Client-Transfer-Encoding' header.
+
+ LWP::Protocol::https is completely redone.
+
+
+
+2001-10-26 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.60
+
+ Made HTTP/1.1 the default. The old HTTP/1.0 module has been
+ renamed as LWP::Protocol::http10. There is an environment
+ variable; PERL_LWP_USE_HTTP_10 that can be set to have LWP
+ still pick up the old drivers.
+
+ Deal with "100 continue" responses even when not requested by
+ and Expect header in the request. MS IIS seems to eager to send
+ this kind of response.
+
+ For HTTP/1.1 over SSL there was a problem with the underlying
+ SSL libraries if the socket was configured to non-blocking mode.
+ Disable this for https.
+ Based on a patch from Michael Thompson <mickey@berkeley.innomedia.com>
+
+ Support the Range header for ftp:// requests.
+ Patch by David Coppit <david@coppit.org>.
+
+ Rearrange Bundle::LWP on request from Chris Nandor.
+
+ HTTP::Cookies: Allow a domain like .foo.com match host "foo.com".
+ Patch by Alexandre Duret-Lutz <duret_g@lrde.epita.fr>
+
+ For redirects make sure Host header is not copied to the new
+ request.
+
+ The HTML::HeadParser is not loaded until actually needed.
+
+ Net::HTTP should now work with perl5.005 by a simple tweak
+ to 'require IO::Socket::INET'.
+
+ WWW::RobotRules::AnyDBM: Explicitly clear database on open.
+ Some DBM implementations doesn't support the O_TRUNC flag
+ properly. Patch by Radu Greab <radu@netsoft.ro>.
+
+
+
+2001-09-19 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_97
+
+ LWP::Protocol::http11: fix socket leak. Because we managed
+ to set up a circular reference within the sockets objects they
+ stayed around forever.
+
+ LWP::UserAgent: Split up simple_request into prepare_request
+ and send_request. Patch contributed by Keiichiro Nagano <knagano@sodan.org>
+
+ LWP::Protocol::http: Pass all header data to LWP::Debug::conns.
+ Based on patch by Martijn.
+
+ LWP::UserAgent: Sean fixed a Cut&Paste error.
+
+ HTTP::Cookies: avoid pack("c",...) warning from bleadperl.
+
+
+
+2001-08-27 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_96
+
+ HTTP/1.1 support also for https.
+ Contributed by Doug MacEachern <dougm@covalent.net>
+
+ The HTTP/1.1 modules are now enabled by default. Hope that will give
+ them more testing than they otherwise would have gotten.
+
+ HTTP::Daemon's accept now has same behaviour as IO::Socket's
+ accept in list context. Fixed by Blair Zajac <blair@gps.caltech.edu>.
+
+ More argument sanity checking in HTTP::Request->uri and
+ LWP::UserAgent->simple_request. Patch by Sean M. Burke.
+
+ HTTP::Protocol::http. Deal with short writes.
+ Patch by Norton Allen <allen@huarp.harvard.edu>
+
+ HTTP::Protocol::http11: Deal with newlines in header values.
+
+ Net::HTTP: call sysread (instead of xread) when more data is required.
+
+
+
+2001-08-06 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_95
+
+ Fix HTTP::Cookies where there is a mix of Set-Cookie and
+ Set-Cookie2 headers. In that case we used to ignore all Set-Cookie
+ headers. Now we only ignore those Set-Cookie headers that reference
+ the same cookie as a Set-Cookie2 header.
+
+ HTTP::Request, HTTP::Response will by default now use "URI" class,
+ instead of "URI::URL", when constructing its URI objects. This
+ has a potential for breaking existing code as URI::URL objects had
+ some extra methods that external code might depend upon.
+
+ Patches by Sean M. Burke:
+ - Fix treatment of language tags in HTTP::Negotiate
+ - Avoid trailing newline in $response->message
+ - HTTP::Response clarifications
+
+ LWP::Simple deals with non-absolute redirects "correctly" now.
+
+ Net::HTTP does not try to load Compress::Zlib until it is needed.
+
+ Net::HTTP documentation has been updated.
+
+
+
+2001-05-05 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_94
+
+ Sean M. Burke's update to LWP::UserAgent:
+ - updated redirect_ok behaviour
+ - new convenience methods: get/head/post/put
+ - protocols_allowed/protocols_forbidden
+ - LWP::Protocol::nogo (new module)
+
+ Added digest auth test against Jigsaw
+
+ Fixed a 'use of uninitialized'-warning in the handling of
+ digest authentication.
+
+ Net::HTTP updates:
+ - new option: SendTE
+ - support transfer-encoding 'deflate' and 'gzip' (when Compress::Zlib
+ is available).
+ - new methods: format_chunk, format_chunk_eof
+ - use -1 (instead of "0E0" as signal that no data was available,
+ but this was not EOF).
+
+
+
+2001-04-28 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_93
+
+ Makefile.PL now asks some questions
+
+ Added live tests for the new HTTP/1.1 support
+
+ LWP::MemberMixin: make it possible to set a value to the 'undef' value.
+
+ Net::HTTP:
+ - transparent support for 'deflate' and 'gzip' transfer encodings
+ (need to have the Compress::Zlib module installed for this to work).
+
+
+
+2001-04-25 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_92
+
+ LWP::Protocol::ftp now support keep-alives too. The command
+ connection will stay open if keep-alives are enabled.
+
+ LWP::Protocol::http11 various fixes:
+ - chunked request content did not work
+ - monitor connection while sending request content
+ - deal with Expect: 100-continue
+
+ LWP::RobotUA: Protect host_port call. Not all URIs have this method.
+
+
+
+2001-04-20 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_91
+
+ Introduced LWP::ConnCache module. Works similar to HTTP::Cookies,
+ it that it takes effect if associated with the $ua.
+
+ The handling of $ua->max_size changed to make 0 mean 0
+ (not unlimited). An value of undef means no limit.
+ The X-Content-Base header is gone. I hope nobody relies on
+ it. It might come back if people start to scream. There
+ is a new Client-Aborted header instead.
+
+ The Range header generated for $ua->max_size had a off-by-one
+ error. A range of "0-1" means 2 bytes.
+
+ The LWP::UserAgent constructor now takes configuration arguments.
+
+ Keep-alive and the new HTTP/1.1 module can now be simply
+ enabled with something like:
+
+ LWP::UserAgent->new(keep_alive => 1);
+
+ New method $ua->conn_cache to set up and access the associated
+ connection manager.
+
+ If the string passed to $ua->agent() ends with space then
+ the "libwww-perl/#.##" string is automatically appended.
+
+ New method $ua->_agent
+
+ Passing a plain hash to $ua->cookie_jar automatically loads
+ HTTP::Cookies and initialise an object using the hash content
+ as constructor arguments.
+
+ LWP::Protocol::http11 now use the conn_cache of the $ua.
+
+ LWP::Protocol::http11 now added a few new Client- headers.
+
+ LWP::Protocol avoid keeping the connection alive if $ua->max_size
+ limit prevents the whole body content from being read.
+
+ Net::HTTP updates:
+ - new methods: write_chunk(), write_chunk_eof()
+ - reset state properly when a new body is read.
+ - always set read buffer empty on eof
+ - doc update
+
+ WWW::RobotRules patch by Liam Quinn <liam@htmlhelp.com>:
+ - Always qualify netloc with port.
+ - Reverse User-Agent substring matching.
+
+
+
+2001-04-18 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53_90
+
+ Note: This is a developer only release. Not for production use.
+
+ LWP::Protocol::http11 now does keep-alives by default. Still need
+ to figure out what interface to provide at the $ua level.
+
+ LWP::Protocol::http11 deals with CODE content in request.
+
+ Net::HTTP updated:
+ - added format_request() method
+ - added _rbuf and _rbuf_length methods
+ - read_response_headers does not return protocol version
+ any more.
+ - peer_http_version method did not work because of typo.
+ - documentation added
+
+ New module Net::HTTP::NB. This is a Net::HTTP subclass that
+ is better suited for multiplexing as it is able to do no-blocking
+ reads of headers and entity body chunks.
+
+ HTTP::Request: Protect $request->uri against evil $SIG{__DIE__} handlers.
+
+ Some reorganisation in how stuff is passed from $ua to protocol object.
+ The $ua is now passed in so protocol objects might store start in it.
+
+ The $ua->max_size default is now 0.
+
+ The $ua->clone method avoids sharing of proxy settings between
+ the old and the new.
+
+ This file is renamed to 'Changes' (used to be 'ChangeLog').
+
+
+
+2001-04-10 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.53
+
+ LWP::Simple::get() could sometimes return nothing on failure in
+ list context. Now it always returns 'undef'.
+
+ HTTP::Cookies does not request 2 dots on domain names any more.
+ New option to hide the Cookie2 header. Cookie2 header now quote
+ the version number. Updated reference to RFC 2965.
+
+ Support for embedded userinfo in http proxy URIs. It means that
+ you know can set up your proxy with things like:
+ http_proxy="http://proxyuser:proxypass@proxyhost:port"
+ Patch by John Klar <j.klar@xpedite.com>.
+
+ Experimental HTTP/1.1 support. New module called Net::HTTP that
+ provide the lower level interface and a LWP::Protocol::http11
+ module that builds on it. The HTTP/1.1 protocol module must be
+ loaded and registered explicitly, otherwise the old and trustworthy
+ HTTP/1.0 module will be used.
+
+ LWP::Protocol::GHTTP will try to use the get_headers() methods
+ so that it can actually extract all the headers.
+
+
+
+2001-03-29 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.52
+
+ HTTP::Header: new method $h->init_header() that will only
+ set the header if it is not already set. Some shuffling
+ around in the code.
+
+ LWP::UserAgent will not override 'User-Agent', 'From'
+ or 'Range' headers if they are explicitly set in the
+ request passed in.
+
+ HTML::Form tries to optimize parsing be restricting the
+ tags that are reported by HTML::Parser. Will need
+ HTML::Parser v3.19_93 or better for this to actually
+ have any effect.
+
+ LWP::Protocol::ftp now deals with path parameters again.
+ It means that you can append ";type=a" to ftp-URI and
+ it will download the document in ASCII mode.
+
+ If the server output multiple Location headers on a redirect,
+ ignore all but the first one.
+
+ Extract cookies failed on request URIs with empty paths.
+ This was only triggered if you used URI objects directly in
+ scripts.
+
+ This change was actually part of 5.51: Fix qop="auth"
+ handling for Digest authentication.
+ Patch by Dave Dunkin <dave_dunkin@hotmail.com>.
+
+
+
+2001-03-14 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.51
+
+ SECURITY FIX: If LWP::UserAgent::env_proxy is called in a CGI
+ environment, the case-insensitivity when looking for "http_proxy"
+ permits "HTTP_PROXY" to be found, but this can be trivially set by the
+ web client using the "Proxy:" header. The fix applied is that
+ $ENV{HTTP_PROXY} is not longer honored for CGI scripts.
+ The CGI_HTTP_PROXY environment variable can be used instead.
+ Problem reported by Randal L. Schwartz.
+
+ NOTE: It is recommended that everybody that use LWP::UserAgent
+ (including LWP::Simple) in CGI scripts upgrade to this release.
+
+ Explicit setting of action on HTML::Form had no effect because
+ of a code typo. Patch by BooK <book@netcourrier.com>.
+
+ HTTP::Daemon: The CONNECT method need special treatment because
+ it does not provide a URI as argument (just a "hostname:port").
+ The non-upward compatibility warning is that you must now call
+ $request->url->host_port to get the host/port string for CONNECT,
+ rather than calling $request->url and using the entire string.
+ Based on patch from Randal L. Schwartz <merlyn@stonehenge.com>
+
+ HTTP::Daemon: Create self URL based on $self->sockaddr. This works
+ better when LocalAddr is used to specify the port number. Based on
+ patch from Ben Low <ben@snrc.uow.edu.au>.
+
+ Avoid potential '<FILE> chunk 1' messages at the end of the response
+ 'message'.
+
+
+
+2001-01-12 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.50
+
+ Fix for test cases that failed because of URI-1.10 now encode
+ space as '+' instead of '%20. Patch by Christian Gilmore
+ <cgilmore@tivoli.com>.
+
+ Makefile.PL: Require URI-1.10.
+
+ HTTP::Daemon now accepts any non-space character as method name
+ on the request line. It used to fail on methods like "M-POST"
+ because it only allowed \w-chars.
+
+ HTTP::Date now allow fractional seconds in ISO date formats.
+ Based on patch from Mark D. Anderson <mda@discerning.com>
+
+ HTTP::Request::Common will now calculate Content-length
+ even if $DYNAMIC_FILE_UPLOAD is set. Patch provided by
+ Lindley, Lee T <Lee.Lindley@viasystems.com>.
+
+
+
+2000-12-31 Gisle Aas <gisle@ActiveState.com>
+
+ Release 5.49
+
+ HTML::Form: Use croak() instead of die. Implement
+ $f->possible_values. Avoid use of undef value warnings.
+
+ HTTP::Cookies: fix epath issue. Make it work for URI::http
+ as the uri-attribute of HTTP::Request object
+
+ HTTP::Date: Allow ignored timezone info in parenthesis. Patch
+ by Sander van Zoest <sander@covalent.net>.
+ Fix calculation of non-GMT timezones (wrong sign). Patch by
+ KONISHI Katsuhiro <konishi@din.or.jp>.
+
+ HTTP::Response: Let $res->base() absolutize the URI. Based on
+ bug report from Hans de Graaff <hans@degraaff.org>.
+
+ Fixed minor doc typos in HTTP::Headers::Util and LWP::UserAgent.
+
+ HTTP::Request::Common: Support boundary spec from client.
+
+ LWP::Simple: Avoid trivial_http_get when @ appears in authority
+ part of URI
+
+ LWP::Authen::Digest: Need to have query in URI param.
+ Spotted by <ronald@innovation.ch>.
+
+ LWP::Protocol::http: unescape username/password if they are
+ specified in the URI.
+
+ Added LWP::Protocol::GHTTP. This allow LWP to use the HTTP::GHTTP
+ module as the low level HTTP driver.
+
+
+
+2000-04-09 Gisle Aas <gisle@aas.no>
+
+ Release 5.48
+
+ README.SSL update by Marko Asplund <aspa@hip.fi>
+
+ Added cookie example to lwpcook.pod
+
+ HTTP::Date::str2time returns undef on failure instead
+ of an empty list as suggested by Markus B Krüger <markusk@pvv.org>
+
+ $request->uri($uri) will now always store a copy of the $uri.
+
+ HTTP::Status: Added status codes from RFC 2616 and RFC 2518 (WebDAV)
+
+ LWP::RobotUA will not parse robots.txt unless content type and
+ content sample looks right.
+
+ LWP::UserAgent: Deal with multiple WWW-Authenticate headers.
+ Patch by Hugo <hv@crypt.compulink.co.uk>
+
+ $ua->proxy can now return the old proxy settings without
+ destroying the old one.
+ Based on patch by Benjamin Low <ben@snrc.uow.edu.au>
+
+ LWP::Protocol::http update
+
+ - don't terminate header parsing on bad headers
+ - extra_sock_opts
+ - preparations for keep alive support
+ - method CONNECT
+
+ WWW::RobotRules deal with various absolute URIs in the
+ disallow lines.
+
+ Makefile.PL: Make sure we have HTML::TokeParser
+
+ Clean test on VMS.
+ Patch by Charles Lane <lane@ DUPHY4.Physics.Drexel.Edu>.
+
+
+
+1999-11-16 Gisle Aas <gisle@aas.no>
+
+ o Release 5.47
+
+ o Added HTML::Form to the distribution.
+
+ o LWP::Protocol::ftp: Make it URI.pm compatible. We broke it in 5.45.
+
+ o LWP::Protocol::http: Kill any Connection header
+
+ o LWP::MediaTypes: Fixed builtin html/text mapping.
+ Added bz2 to suffixEncoding
+
+
+
+1999-10-28 Gisle Aas <gisle@aas.no>
+
+ o Release 5.46
+
+ o Updated mailing list address
+
+ o Avoid warnings for lwp-request -t
+
+ o referrer as alternative spelling for referer as suggested by tchrist.
+
+ o More conservative selection of boundary for multipart messages
+ in &HTTP::Request::Common::POST.
+
+ o LWP::MediaTypes can now export &read_media_types.
+
+ o Spelling corrections from Ben Tilly <Ben_Tilly@trepp.com>
+
+
+
+1999-09-20 Gisle Aas <gisle@aas.no>
+
+ o Release 5.45
+
+ o The LWP SSL support better explained. Documentation in README.SSL
+ and lwpcook.pod contributed by Marko Asplund <aspa@hip.fi>.
+
+ o LWP::Protocol::https: Try loading IO::Socket::SSL if Net::SSL is
+ not available.
+
+ o lwp-mirror -t option did not work.
+
+ o defined(@ISA) eliminated. Patch by Nathan Torkington <gnat@frii.com>
+
+ o LWP::Protocol::ftp: Protect against empty path_segments
+
+
+
+1999-06-25 Gisle Aas <gisle@aas.no>
+
+ o Release 5.44
+
+ o We require URI-1.03, since this fix some query quoting stuff
+ that HTTP::Request::Common rely upon.
+
+ o 'lwp-request -HUser-Agent:foo' can now be used to set this
+ header too.
+
+ o Localize $/ to ensure standard record separator a few places
+ in HTTP::Cookies
+
+ o LWP::UserAgent will now set the Range header in requests if
+ the $ua->max_size attribute is set.
+
+
+
+1999-05-09 Gisle Aas <gisle@aas.no>
+
+ o Release 5.43
+
+ o New lwp-request command line option that allow you to put any
+ header into the request (-H).
+
+ o New HTTP::Date because of Y2K-problems with the old one.
+ It refused to parse the ftp-listing (ls -l) dates missing year.
+ Additional entry point is parse_date(). This function avoid any
+ limitations caused by the time-representation (seconds since
+ epoch).
+
+ o Y2K fix to t/base/cookies.t. Netscape's original cookie
+ example expires at 09-Nov-99.
+
+ o Added another binmode() to LWP::Protocol::file as suggested
+ by Matt Sergeant <matt-news@sergeant.org>
+
+
+
+1999-03-20 Gisle Aas <gisle@aas.no>
+
+ o Release 5.42
+
+ o MacOS patches from Paul J. Schinder <schinder@leprss.gsfc.nasa.gov>
+
+ o Documentation patch from Michael A. Chase <mchase@ix.netcom.com>
+
+ o PREREQ_PM patch from Andreas Koenig <andreas.koenig@anima.de>
+
+ o LWP::Simple::head fix by Richard Chen <richard@lexitech.com>
+
+ o "LWP fails with PerlEXE"-patch from Gurusamy Sarathy
+
+ o Allow "." in HTTP header names. Patch by Marc Langheinrich
+ <marc@ccm.cl.nec.co.jp>
+
+ o Fixed reference to $uri->netloc in lwp-request
+
+ o Cute animation in lwp-download
+
+
+
+Mon Nov 19 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.41
+
+ o HTTP::Cookies provide better Netscape cookies compliance.
+ Send back cookies to any port, and allow origin host name to
+ be specified as domain, and still be treated as a domain.
+ Patch from Andreas Gustafsson <gson@araneus.fi>.
+
+ o HTTP::Cookies now ignore the Expires field in Set-Cookie, if the
+ date provided can't be parsed by HTTP::Date.
+
+ o HTTP::Daemon will lowercase the hostname returned from
+ Sys::Hostname::hostname(). This avoid some test failures in
+ the test suite for people with upper- or mixed-cased hostnames.
+
+ o LWP::Protocol::gopher: IO::Socket::INET ctor did not specify
+ Proto => 'tcp'. This made it less portable to older IO::Socket
+ versions.
+
+ o No programs installed when you build the Makefile with
+ 'perl Makefile.PL LIB=/my/lib'
+
+ o LWP bundle mention Digest::MD5 instead of MD5
+
+ o headers-auth.t test suite bug triggered by perl5.005_5x.
+ Patch by Albert Dvornik <bert@genscan.com>
+
+ o The local/http.t test actually did try to unlink("."). This was
+ very confusing on systems where it succeed.
+
+
+
+Mon Oct 12 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.40_01
+
+ o Unbundled URI::URL modules. You now have to install the
+ URI.pm module in order to get libwww-perl working.
+
+ o Made library URI.pm compatible. Made all URI object instantiations
+ based on $HTTP::URI_CLASS variable.
+
+ o New lwp-rget option: --referer.
+ Patch by INOUE Yoshinari <inoue@kusm.kyoto-u.ac.jp>.
+
+ o One more binmode() to HTTP::Daemon as suggested by
+ Markus Laker <mlaker@contax.co.uk>.
+
+
+
+Tue Aug 4 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.36
+
+ o The lwp-download script will call $ua->env_proxy now.
+
+ o The lwp-request script allows content types (specified with the -c
+ option) with optional parameters like: multipart/mixed; boundary="--".
+
+ o LWP::UserAgent will lowercase all authentication parameter names
+ before passing it to the authentication module. Previous releases
+ ignored parameters like; Realm="Foo" (because Realm contained
+ upper case letters).
+
+ o LWP::Protocol::ftp test for If-Modified-Since was wrong.
+
+ o How the $url->abs method works can now be configured with the global
+ variables $URI::URL::ABS_ALLOW_RELATIVE_SCHEME and
+ $URI::URL::ABS_REMOTE_LEADING_DOTS.
+
+ o The anonymous password guesser for ftp URLs will now call the external
+ `whoami` program any more. Patch by Charles C. Fu <ccwf@bacchus.com>.
+
+ o LWP::Protocol::http now allow dynamic requests without any
+ Content-Length specified when Content-Type is multipart/*
+
+ o HTTP::Request::Common can now upload infinite files.
+ (Controlled by the $DYNAMIC_FILE_UPLOAD configuration variable.)
+
+
+
+Fri Jul 10 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.35
+
+ o More lwp-rget patches from David D. Kilzer <ddkilzer@madison.dseg.ti.com>.
+ Adds the following new options: --iis, --keepext, --tolower
+
+ o LWP::MediaTypes patches from MacEachern <dougm@pobox.com>. Adds new
+ functions: add_type(), add_encoding(), read_media_types()
+
+
+
+Tue Jul 7 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.34
+
+ o LWP::Protocol::ftp now try to use the MDTM command to support
+ the Last-Modified response header as well as
+ If-Modified-Since in requests. Original and final patch by
+ Charles C. Fu <ccwf@bacchus.com>
+
+ o $url->path_components will not escape "." any more.
+
+ o WWW::RobotRules will now work for Mac text files too (lines delimited
+ by CR only). Patch by Olly Betts <olly@muscat.co.uk>
+
+ o lwp-rget support <area ..> links too.
+
+
+
+Thu May 7 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.33
+
+ o LWP::Simple::get() did try to handle too many of the 3xx
+ codes as redirect when it bypasses full LWP.
+
+ o LWP::UserAgent->mirror will now use utime(2) to set the
+ file modification time corresponding to the Last-Modified
+ header.
+
+ o LWP::Protocol::http will not modify the HTTP::Request that
+ it is processing. This avoids sticky Host header for
+ redirects.
+
+ o URI::Heuristic and lwp-download documentation update.
+
+
+
+Wed Apr 15 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.32
+
+ o Much improved HTTP::Daemon class. We now support persistent
+ connections. Changes include:
+ - $c->get_request can be told to return after reading and
+ parsing headers only.
+ - $c->reason (new method)
+ - $c->read_buffer (new method)
+ - $c->proto_ge (new method)
+ - $c->force_last_request (new method)
+ - $c->send_response now support CODE reference content
+ and will use chunked transfer encoding for HTTP/1.1 clients.
+ - expanded the documentation.
+
+
+
+Fri Apr 10 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.31
+
+ o Makefile.PL now checks that HTML::HeadParser is present.
+
+ o Updated HTTP::Cookies according to draft-ietf-http-state-man-mec-08.txt
+ It now supports the .local domain and value less 'port' attribute in
+ the Set-Cookie2 header.
+
+ o HTTP::Headers update:
+ - $h->content_type now always return a defined value
+ - $h->header($field) will now concatenate multi-valued header
+ fields with "," as separator in scalar context.
+
+ o HTTP::Request::Common update:
+ - used to destroy the content of the hash/array arguments
+ passed to its constructor functions.
+ - allow a hash reference to specify form-data content.
+ - you can override Content-Disposition for form-data now.
+ - set content-encoding for files if applicable
+ - default boundary string is now always "--000"
+
+ o LWP::UserAgent will not follow more than 13 redirects
+ automatically.
+
+
+
+Wed Apr 1 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.30
+
+ o Unbundled the following modules:
+
+ * HTML-Parser (HTML::Parser, HTML::Entites, HTML::LinkExtor,...)
+ * HTML-Tree (HTML::Element, HTML::TreeBuilder,...)
+ * Font-AFM (Font::AFM, Font::Metrics::*)
+ * File-CounterFile
+
+ o Simplified internal structure of HTTP::Headers. Hopefully,
+ nobody will notice.
+
+ o New modules HTTP::Headers::Auth, HTTP::Headers::ETag that adds
+ additional convenience methods to the HTTP::Headers class.
+
+ o Removed split_etag_list() from HTTP::Headers::Util, in the hope
+ that nobody had starting using it.
+
+
+
+Tue Mar 24 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.22
+
+ o HTTP::Cookies made more compatible with Netscape cookies. Allow
+ the domain to match host, allow dots in the part of the hostname
+ not covered by domain. Don't quote the cookie value even when it
+ contains non-token chars. Based on patch from Kartik Subbarao
+ <subbarao@computer.org>.
+
+ o Updated HTTP::Status to reflect <draft-ietf-http-v11-spec-rev-03>.
+ RC_MOVED_TEMPORARILY renamed to RC_FOUND. Added codes
+ RC_TEMPORARY_REDIRECT (307) and RC_EXPECTATION_FAILED (417).
+ Slightly more documentation too.
+
+ o The split_header_words() function HTTP::Headers::Util could go
+ into infinite loop on some header values. Implemented split_etag_list()
+ too. Added more documentation and test script for this module.
+
+ o LWP::Simple now switch to full LWP implementation even for systems
+ that force all environment keys to be upper case. Modification
+ suggested by Dale Couch <dcouch@training.orl.lmco.com>.
+
+ o LWP::UserAgent allows redirects to a relative URL with scheme to be
+ made. Suggested by Chris W. Unger <cunger@cas.org>.
+
+ o Applied dtd2pm.pl patches from <peterm@zeta.org.au>. It can now
+ extract information from the HTML40.dtd
+
+
+
+Thu Mar 12 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.21
+
+ o lwp-rget patches from David D. Kilzer <ddkilzer@madison.dseg.ti.com>
+ (modified by Gisle). Support the --hier and the --auth options
+ and <frame>s.
+
+ o File::CounterFile protect against bad $/ and $\ as suggested
+ by Frank Hoehne.
+
+ o File::Listing used "next" when return was more appropriate.
+ Patch by erik@mediator.uni-c.dk.
+
+ o HTML::Element support for multiple boolean attributes for a single
+ element. Patch by Philip Guenther.
+
+ o Can set $HTTP::Headers::TRANSLATE_UNDERSCORE to FALSE value to
+ suppress tr/_/-/ of header keys.
+
+ o LWP::Protocol::http will not initialize the Host header if it is
+ already set.
+
+ o LWP::Protocol::http did not handle responses with no header lines
+ correctly. Patch by Daniel Buenzli <buenzli@rzu.unizh.ch>
+
+ o $url->rel() handles path segments without leading "/" better.
+
+
+
+Fri Feb 13 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.20
+
+ o Fixed the "500 Offset outside string" bug that affected perl
+ 5.004_03 and older version of Perl.
+
+ o Fixed a documentation typo spotted by Michael Quaranta
+ <quaranta@vnet.IBM.COM>
+
+ o HTTP::Date: Protect against croaking from timegm/timelocal.
+
+
+
+Mon Jan 26 1998 Gisle Aas <aas@sn.no>
+
+ o Release 5.19
+
+ o HTML::Parser does not call $self->text() callback for empty text
+ any more.
+
+ o LWP::Protocol::https was noisy when connections failed and the
+ script was running with '-w' (noise inherited from IO::Socket::INET)
+
+ o $ua->use_alarm(BOOL) now gives a warning if running with -w
+
+
+
+Tue Jan 20 1998 Gisle Aas <aas@sn.no>
+
+ o Developer release 5.18_05
+
+ o HTTPS support based on my Crypt-SSLeay module. The Net-SSLeay module
+ is not supported any more.
+
+ o lwp-request documentation typo spotted Martijn Koster.
+
+ o Removed superfluous \\ in the URI::Escape regex. This was also
+ spotted by Martijn.
+
+ o File::Listing now handles timezones correctly.
+
+ o Added $VERSION to modules that was missing it.
+
+ o Added 'use strict' to several modules that was missing it.
+
+ o LWP::Protocol::http now adds the Client-Peer header to responses and
+ has hooks for more callbacks.
+
+ o LWP::Protocol::https adds Client-SSL-Cipher, Client-SSL-Cert-Subject
+ and Client-SSL-Cert-Issuer headers to the response. The requests can
+ also be made conditional based on the peer certificate using the
+ If-SSL-Cert-Subject header in requests.
+
+ o HTML::Parse is back. (It was even used in lwpcook.pod)
+
+
+
+Wed Dec 17 1997 Gisle Aas <aas@sn.no>
+
+ o Developer release 5.18_04
+
+ o Makefile.PL fix based on report from Pon Hwa Lin <koala@fragment.com>
+
+ o lwp-request will now print the response code message with -s and -S
+ options.
+
+ o Hide IO::Socket::INET noise when running under -w
+
+ o Don't set 'Content-Length: 0' in HTTP requests.
+
+ o LWP::Protocol::http now calls LWP::Debug::conns again
+
+
+
+Tue Dec 16 1997 Gisle Aas <aas@sn.no>
+
+ o Developer release 5.18_03
+
+ o Got rid of alarms() and replaced LWP::Socket with IO::Socket::INET.
+ New protocol implementations for http, https, gopher, nntp.
+ $ua->use_alarm() is now a noop.
+
+ o LWP::Protocol::ftp patch from Tony Finch <fanf@demon.net>.
+
+ o Removed deprecated modules from the distribution; HTML::Parse,
+ LWP::Socket, LWP::SecureSocket, LWP::IO, LWP::TkIO.
+
+
+
+Fri Dec 12 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.18
+
+ o HTTP authorization patches from Tony Finch <fanf@demon.net>.
+ Allows "user:pass@" in HTTP URLs.
+
+ o HTML::Parser patch by Brian McCauley <B.A.McCauley@bham.ac.uk>.
+ Pass original text to end() method.
+
+ o The HTML::Parser->netscape_buggy_comment method is deprecated.
+ Use HTML::Parser->strict_comment instead. The default value
+ has changed with the name.
+
+ o Some HTML::Parser optimization tweaks.
+
+ o New module named HTML::Filter
+
+ o Updated HTTP::Headers to the latest HTTP spec. Added knowledge
+ about the "Trailer", "Expect", "TE", "Accept-Range" headers.
+ "Public" header is gone.
+
+ o Added some more header convenience methods: if_unmodified_since,
+ content_language, and proxy_authorization methods.
+
+ o HTTP::{Request,Response}->clone can handle subclasses now.
+
+ o HTTP::Request->url() can now undefine the URL.
+
+ o HTTP::{Request,Response}->as_string format looks more like
+ the HTTP protocol formatting now. Dashed lines above and
+ below is gone.
+
+ o Documented HTTP::Response->status_line method
+
+ o Compressed HTML::Response->error_as_HTML output
+
+ o HTTP::Status updated to latest HTTP spec. Added
+ RC_REQUEST_RANGE_NOT_SATISFIABLE (416)
+
+
+
+Tue Dec 2 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.17
+
+ o All authentication handling moved out of LWP::UserAgent and into
+ LWP::Authen::Basic and LWP::Authen::Digest. We now also support
+ PROXY_AUTHENTICATION_REQUIRED responses.
+
+ o HTML::Formatter will always add a blank line for <br>.
+
+ o Avoid use of uninitialized value in HTTP::Daemon.
+
+ o HTTP::Date allows seconds when recognizing 'ls -l' dates. This
+ allows us to parse syslog time stamps.
+
+ o HTTP::Request::Common::POST allows a hash reference as second
+ argument (in addition to an array reference).
+
+ o LWP::Simple will initialize the $ua if it is exported.
+
+ o Various documentation updates.
+
+
+
+Fri Nov 21 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.16
+
+ o LWP::Simple::head() would die in array context because str2time
+ was not imported any more.
+
+ o HTTP::Daemon->accept now takes an optional package argument like
+ IO::Socket->accept does.
+
+ o Made HTTP::Request and HTTP::Response subclassable.
+
+ o Added Proxy-Authorization example to lwpcook.
+
+
+
+Thu Nov 6 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.15
+
+ o New module URI::Heuristic
+
+ o The lwp-request script now use URI::Heuristic for it's URL arguments.
+ It means that 'lwp-request perl' will not get a file called "./perl"
+ but will fetch the page "http://www.perl.com" or something similar.
+ If you want to get the file you have to prefix it with "./". Full
+ URLs are never affected by this.
+
+ o LWP::Simple::get() will bypass LWP for simple HTTP requests. This
+ should make it somewhat faster.
+
+ o LWP::RobotUA has a new method called $ua->use_sleep() that
+ controls how niceness towards the servers are enforced.
+ Previously $ua->use_alarm() used to control this, but this did
+ not work well on Win32 systems.
+
+ o URI::URL::rel() will handle URLs to a fragment within the same
+ document better. Initial patch from Nicolai Langfeldt
+ <janl@math.uio.no>.
+
+ o HTML::Element don't consider </th>, </tr> and </td> optional any
+ more. I wonder how Netscape managed to not implement this
+ correctly all this time.
+
+ o Added lots of modern tags to HTML::AsSubs.
+
+ o HTTP::Request::Common will read uploaded files in binmode().
+ This should be better for Win32 systems. Contributed by
+ <Steve_Kilbane@cegelecproj.co.uk>.
+
+
+
+Sun Oct 12 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.14
+
+ o HTML::Formatter patches from Andreas Gustafsson <gson@araneus.fi>.
+ The formatters handling of whitespace is much better now. Thanks!
+
+ o HTML::FormatText: can specify margins in the constructor.
+
+ o URI::URL: the base will be absolutized internally now.
+
+ o URI::URL will take advantage of void context provided by perl5.004.
+ This means that using $u->path and $u->query should be safer now.
+
+ o URI::URL->print_on defaults to STDERR now (used to be STDOUT).
+
+ o URI::URL: removed version 3 compatibility stuff ($COMPAT_VER_3).
+
+ o $ua->mirror should work better on dosish systems (can not
+ rename when target file exists).
+
+ o Typo in lwp-download prevented it from compiling.
+
+ o Some minor documentations typos corrected.
+
+
+
+Sat Sep 20 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.13
+
+ o Brand new module called HTTP::Cookies. It stores cookies
+ (Set-Cookie and Set-Cookie2 headers) from responses and can
+ create appropriate Cookie headers for requests. It can also
+ share cookie files with Netscape.
+
+ o LWP::UserAgent now support the cookie_jar() attribute. When
+ set to an HTTP::Cookies object, it will automatically manage
+ the cookies sent to the servers. Off by default.
+
+ o New header utility functions in HTTP::Headers::Util.
+
+ o Win32 and OS/2 patches for the lwp-xxx utilities. Turn on
+ binary mode by default (option to turn it off), avoid modifying $0,
+ and don't be confused about suffixes in the script names.
+ Contributed by Ben Coleman <bcoleman@mindspring.com>
+
+ o OpenVMS patch for Font:AFM by Brad Hughes <brad@tmc.naecker.com>
+
+
+
+Fri Sep 5 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.12
+
+ o decode_entities() would sometimes introduce ";" after
+ things that looked like they were entities.
+
+ o HTML::LinkExtor knows about <applet code="...">
+
+ o Patch from Gary Shea <shea@gtsdesign.com> that makes the
+ tests work even if perl is not called "perl"
+
+ o HTTP::Date handles 12:00PM correctly now. Patch from
+ William York <william@mathworks.com>
+
+ o HTTP::Request::Common don't quote the boundary string for
+ multipart/form-data messages any more.
+
+ o Font::AFM works for encodings where .notdef is defined to
+ have some size. Documentation and efficiency update.
+
+
+
+Wed Aug 6 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.11
+
+ o Perl version 5.004 is now required for libwww-perl.
+
+ o Win32 patches from Gurusamy Sarathy <gsar@engin.umich.edu>.
+ Now passes all tests on that platform.
+
+ o HTTPS support contributed by Josh Kronengold <mneme@mcny.com>
+
+ o Support hex entities ÿ HTML::Entities::(en|de)code only
+ modify their argument in void context.
+
+ o Fixed formatter bug with <font> tags which did not specify size.
+
+ o Better HTML::HeadParser documentation
+
+ o Fixed HTML::LinkExtor documentation typo spotted by Martijn.
+
+ o HTTP::Request::Common now use CRLF for multipart/form-data
+
+
+
+Fri Jun 20 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.10
+
+ o Make '+' a reserved URL character. Decode unescaped '+' as
+ space in $url->query_form().
+
+ o Use $Config{d_alarm} to determine default for $ua->use_alarm()
+
+
+
+Tue Jun 10 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.09
+
+ o Removed the MIME modules from the distribution. They are distributed
+ separately now.
+
+ o Added a new module called HTTP::Request::Common
+
+ o Improved HTTP::Status documentation. It is now also possible
+ to import the is_client_error/is_server_error functions.
+
+ o LWP::MediaTypes::guess_media_type() can now take an optional
+ HTTP::Header parameter.
+
+ o LWP::Protocol ensures that scheme is legal as module name.
+
+ o LWP::Protocol::http is not as strict when trying to verify the
+ method name. It now also accepts responses without a message
+ on the status line.
+
+ o WWW::RobotRules::AnyDBM_File: Some DBMs fail to allow multiple
+ opens of the same file. Patch from Mark James <jamesm@skate.org>
+
+ o Created Bundle::LWP
+
+
+
+Sat Apr 5 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.08
+
+ o Made libwww-perl warning compatible with upcoming perl5.004beta2
+ (aka 5.003_98)
+
+ o encode_base64() did not work properly if pos() of the string to
+ encode was different from 0.
+
+ o HTML::Parser was confused about "</" when it did not start an end tag.
+
+ o HTML::FormatPS will provide ISOLatin1Encoding in its output.
+
+ o Calling HTML::LinkExtor->links will clear out old links.
+
+ o url()->rel($base) would ignore the $base argument.
+
+ o Don't croak() when setting url()->path().
+
+
+
+Tue Feb 11 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.07
+
+ o Can limit the size of the response content with $ua->max_size()
+
+ o Added time2iso() functions to HTTP::Date.
+
+ o Made LWP::Protocol::http more portable to the MacPerl. /./ match
+ different things on MacPerl.
+
+
+
+Mon Jan 27 1997 Gisle Aas <aas@sn.no>
+
+ o Release 5.06
+
+ o URI::URL is now compatible with perl5.004 overloading.
+
+ o HTML::HeadParser makes X-Meta-Name headers for <meta> elements
+ that does not specify an 'http-equiv' attribute.
+
+ o URI::URL::ftp does not die if Net::Domain is not installed and
+ you ask for an anonymous username or password.
+
+ o WWW::RobotRules: The robots.txt parser did not ignore comment lines
+ as it should.
+
+ o LWP::Protocol::http is more forgiving towards servers that return
+ bad responses.
+
+ o Allow "?" before search string in gopher URLs.
+
+ o LWP::Protocol::file did not escape funny filenames when generating
+ HTML directory listings.
+
+ o LWP::Protocol::ftp now gets the Content-Encoding correct. 'CODE'
+ content in PUT requests also work now.
+
+ o Relative locations in redirects did not work with URI::URL::strict.
+
+ o OS/2 portability patches from Ilya Zakharevich
+
+ o LWP::Authen::* patch from Doug MacEachern
+
+ o Support experimental data:- URLs
+
+ o Some tests (those using HTTP::Daemon) now die more gracefully if
+ IO::* modules is not installed.
+
+
+
+Wed Dec 4 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5.05
+
+ o LWP::UserAgent::simple_request: local($SIG{__DIE__}) protects us
+ against user defined die handlers.
+
+ o Use Net::Domain (instead of Sys::Hostname) to determine FQDN. It
+ is used by URI::URL when it determines anonymous ftp login address.
+
+ o lwp-download: New program in the bin directory
+
+ o HTML::Parser: Allow '_' in attribute names. This makes it possible
+ to parse Netscape's bookmarks.html file.
+
+ o HTTP::Daemon: Fixed chunked transfer encoding and multipart content
+ in get_request(). Support HTTP/0.9 clients.
+
+ o Don't clobber regex variables when HTTP::Message delegates methods
+ to the header.
+
+ o Base64::decode_base64 now checks that the length input string to
+ decode is a multiple of 4.
+
+ o t/robot/rules-dbm.t clean up better and will use AnyDBM for dumping
+
+ o File::CounterFile: $/ strikes again by Andreas König
+
+ o File::Listing updates from William York <william@mathworks.com>. We
+ can now parse the MS-Windows ftp server listings.
+
+ o HTTP::Date now supports the MS-Windows 'dir' date format. Patch by
+ William York.
+
+ o LWP::MediaTypes::media_suffix will return first type in scalar context.
+
+
+
+Tue Oct 22 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5.04
+
+ o Added HTTP::Daemon. This is a HTTP/1.1 server class. This means
+ that libwww-perl no longer is a client library only. The HTTP::Daemon
+ is also used in the new test suite.
+
+ o HTTP::Message support the protocol() method. Used by HTTP::Daemon.
+
+ o HTTP::Response can be constructed with a header and content as
+ argument.
+
+ o Typo corrections in the documentation.
+
+ o File::Listing::parse_dir accepts "GMT" as timezone now.
+
+ o HTML::Parser will call the start() method with two new parameters;
+ $attrseq, $origtext.
+
+ o Integrated HTML::FormatPS patches from
+ Jim Stern <jstern@world.northgrum.com>
+
+ o Class modules don't inherit from AutoLoader any more. They just
+ import the AUTOLOAD method.
+
+ o LWP::Protocol will untaints scheme before loading protocol module.
+
+ o Digest does not send "opaque" if it was not present in the request.
+ The "Extension" header is not returned any more.
+
+ o New method: $url->crack that will return a list of the various
+ elements in a URI::URL.
+
+ o WWW::RobotRules did not use the agent() method when determining
+ who we are. This affected WWW::RobotRules::AnyDBM_File parsing
+ for robots.txt. Visit count did not increment for
+ WWW::RobotRules::InCore.
+
+
+
+Tue Oct 1 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5.03
+
+ o Hakan Ardo's persistent robot rules is now part of the standard
+ distribution. This is still experimental and might change in the
+ future. It includes the new WWW::RobotRules::AnyDBM_File class
+ and updates to LWP::RobotUA.
+
+ o HTML::Parser now supports buggy Netscape comment parsing. Enable
+ it by calling $p->netscape_buggy_comment(1). The previous version
+ of the parser could also (under very unlucky and unlikely
+ circumstances) call the $self->comment() method several times for
+ the same comment text.
+
+ o HTML::Parser: Use new $p->eof to signal end of document instead of
+ $p->parse(undef).
+
+ o HTML::Element::starttag() is now smarter about which quotes it
+ use around attribute values.
+
+ o New HTTP::Response methods: current_age(), freshness_lifetime(),
+ is_fresh(), fresh_until().
+
+ o HTTP::Message: New method ($mess->content_ref) which will return
+ a reference to the current content.
+
+ o URI::URL: New method ($url->rel) which does the opposite of abs().
+ Example: url("http://host/a/b/c", "http://host/c/d/e")->rel would
+ return url("../../a/b/c", "http://host/c/d/e"). This was
+ suggested by Nicolai Langfeldt <janl@ifi.uio.no>
+
+ o URI::URL: $url->query_form can now take array references as value
+ specification. For instance: $url->query_form(foo => ['bar', 'baz']
+
+ o Avoid '"my" variable $var masks earlier declaration in same scope'
+ warnings in perl5.003_05.
+
+
+
+Wed Sep 11 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5.02
+
+ o lwp-rget: Initialize proxy settings from environment
+
+ o HTML::Entities::encode_entities: Don't encode $ and %
+
+ o HTML::LinkExtor::links: Now works when no links were found.
+
+ o HTTP::Headers::as_string: Allow \n in header value
+
+
+
+Tue Aug 1 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5.01.
+
+ o Updated ftp protocol module to be compatible with Net::FTP
+ version 2.00 (the version found in libnet-1.00)
+
+ o New HTML parser module called HTML::LinkExtor
+
+ o Various documentation typo corrections. Most of these contributed
+ by Bob Dalgleish.
+
+ o HTML::HeadParser updates 'Content-Base' instead of 'Base'. It also
+ updates the 'Link' header based on <link ...>
+
+ o HTTP::Headers and HTTP::Status updated according to
+ draft-ietf-http-v11-spec-06
+
+ o HTTP::Headers can now use "_" as alternative to "-" in field names.
+
+ o HTTP::Response::base now looks for 'Content-Base',
+ 'Content-Location' and 'Base' headers.
+
+ o Avoid warning in LWP::MediaTypes if $ENV{HOME} is not defined.
+
+ o The new $ua->parse_head() method can be used to turn off
+ automatic initialization of response headers from the <HEAD>
+ section of HTML documents.
+
+ o Added eq() method for URI::URL objects
+
+ o The HTML::Formatter recovers even if a handle method is not defined
+ for all tags found during traversal
+
+
+
+Sun May 26 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5.00.
+
+ o LWP::Simple::head() now return something useful in scalar context.
+
+ o Rewritten the HTML::Parse stuff. Introduced the HTML::Parser class
+ that will tokenize a HTML document. The rest of the old
+ HTML::Parse functionality has moved to HTML::TreeBuilder class.
+ Note, that the HTML stuff is still alpha.
+
+ o Implemented HTML::HeadParser. This is a lightweight parser for
+ the <HEAD> section of a HTML document.
+
+ o HTML::Element had problem with presenting things like <foo
+ bar="bar">.
+
+ o HTML::Entities: Included additional ISO-8859/1 entities listed in
+ RFC1866.
+
+ o HTML::AsSubs exported 'header' instead of 'head'
+
+ o We know about a few more of the HTML 3.2 element.
+
+ o HTTP::Date had problems with years before 1970, because Time::Local
+ went into an infinite loop. Check for this.
+
+ o Added $header->title method.
+
+ o Made $header->authorization_basic return "uname:passwd" in scalar
+ context
+
+ o LWP::Protocol::collect() will let the HTML::HeadParser look at the
+ document content as it arrives. This will initialize headers from
+ elements like <base href="...">, <title>...</title> and <meta
+ http-equiv="..." ...>.
+
+ o Simplified $response->base implementation, because we don't have
+ to look into the content any more.
+
+ o Added -quiet option to lwp-rget
+
+ o Typo fixes and some documentation additions.
+
+
+
+Thu May 9 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b13
+
+ o Made URI::URL::strict(0) the default. I got tired of all this
+ eval { } stuff just to recover. The URI::URL::strict'ness also
+ determine if calling some standard method that happens to be
+ illegal for some protocol scheme will croak or just be ignored.
+
+ o Ensure correct $INPUT_RECORD_SEPARATOR and $OUTPUT_RECORD_SEPARATOR
+ at places where we <> or print.
+
+ o Always localize $_ before any 'while(<FILE>) {}'-loops
+
+ o Implemented $protocol->collect_once() and simplified several
+ of the protocol implementations by using it.
+
+ o The HTML parser used to get it wrong if you were unlucky about the
+ breakup of the text. An example of broken behaviour was this:
+
+ $html = parse_html "<!-- A comment -";
+ $html = parse_html "-> and some text.";
+
+ o The HTML parser does not ignore whitespace-only text any more.
+
+ o HTML::Parse warnings are now optional and turned off by default.
+
+ o New start for $html->as_HTML().
+
+ o Fixed some typos
+
+
+
+Wed Apr 24 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b12
+
+ o New utility program called 'lwp-rget'.
+
+ o $response->base was broken for HTML documents
+
+ o New fancy LWP::Debug import() method. Can now turn on debugging with
+ "use LWP::Debug '+';"
+
+ o Trap exceptions (die) from the response callback routine
+
+ o The RobotUA now requires an e-mail address of the person responsible
+ for the robot.
+
+ o New $ua->from() method.
+
+ o Support for gopher Index-Search (gopher type '7' requests).
+ Contributed by Harry Bochner <bochner@das.harvard.edu>
+
+ o Cleaned up white-space usage in the source.
+
+
+
+Wed Apr 3 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b11
+
+ o Implemented a NNTP protocol module. The library can now fetch and
+ post news articles.
+
+ o More documentation
+
+ o Don't look at the URI header for redirects
+
+ o New $res->base() method for HTTP::Responses
+
+ o Graham Barr's patch to File::Listing to make it more OO internally
+
+ o content_type() return value is canonicalized
+
+ o $ua->request() does not die on bad URLs any more
+
+ o LWP::MediaTypes merge all media.types files that if finds
+
+ o FTP request with content to file or callback did not work
+
+ o The value of HTTP Host: header is now $url->netloc;
+
+ o The URI::URL constructor now accept URLs wrapped up in "<>"
+
+ o $url->abs() now has a second optional argument that makes it accept
+ that relative URLs can have scheme, i.e. treat "http:img.gif" as a
+ relative URL.
+
+ o Added prototypes to the HTTP::Status::is_xxx() functions
+
+ o Added prototypes to the MIME:: encoding/decoding functions
+
+ o Always return scheme for mailto and news URLs (as_string)
+
+ o RobotRules patches from Henry A Rowley.
+
+ o More tests
+
+ o <SMALL> and <BIG> again
+
+
+
+Thu Mar 14 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b10
+
+ o GET ftp://host/doc was never successful for normal files.
+
+ o LWP::Socket: read_until() did not notice EOF condition. I.e. if
+ a HTTP server closed the connection before any output was generated,
+ the we continued to read 0 bytes in a busy loop until the alarm()
+ killed us.
+
+ o Added support for Digest Access Authentication. Contributed by
+ Doug MacEachern <dougm@osf.org>.
+
+ o Makefile.PL: check for MD5 library
+
+ o No longer print message content in HTTP::Response::error_as_HTML()
+
+ o Access to "file:/path" gave warning when the environment variable
+ no_proxy was set.
+
+ o The http-post test sends a Content-Type header. Some servers hang
+ if this header is missing.
+
+ o HTML::Parse:
+ - allow <SMALL> and <BIG> tags
+ - allow empty attribute values
+
+
+
+Tue Mar 5 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b9
+
+ o Started to write on the libwww-perl cookbook (lwpcook.pod)
+
+ o The URI::URL module now exports the function url(). This is an
+ alternative (easy to use) constructor function.
+
+ o Expanding relative file URLs starting with "#" did not work.
+
+ o Fixed autoloaded DESTROY problem by adding empty DESTROY routine
+ to URI::URL.
+
+ o Does not try generate password for ftp-URLs unless the username is
+ "anonymous" or "ftp"
+
+ o The LWP::Simple user agent proxy settings are initialized from
+ the proxy environment variables.
+
+ o LWP::Protocol::ftp: Use the Net::FTP library to access ftp servers.
+ Convert directories to HTML on request (Accept: text/html).
+
+ o New module HTTP::Negotiate
+
+ o New module File::Listing
+
+ o HTTP::Date::str2time can parse a few more formats, like the 'ls -l'
+ format and ISO 8601. The function now also takes an optional second
+ parameter which specify a default time zone.
+
+ o Added prototypes to the HTTP::Date functions.
+
+ o The library adds a timestamp to responses ("Client-Date")
+
+ o HTTP::Status: Updated to proposed HTTP/1.1
+
+ o HTTP::Headers: Updated to proposed HTTP/1.1
+
+ o LWP::Protocol::http: Updated to HTTP/1.1 methods
+
+ o Took out stringify overloading in HTML::Element.
+
+
+
+Mon Feb 26 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b8
+
+ o Renamed functions using thisStyleOfNames to this_style_of_names.
+ Provided a script called 'update_from_5b7'
+
+ o Renamed the 'request' and 'mirror' scripts to 'lwp-request' and
+ 'lwp-mirror'. The GET, POST, HEAD aliases for 'lwp-request' are
+ the same.
+
+ o Implemented LWP::RobotUA
+
+ o Class name for RobotRules did not match the file name
+
+ o URI::URL
+ - perl5.002gamma is required (because use vars).
+ - The leading slash in now part of the path if it is present.
+ - More documentation
+ - Use AutoLoader to speed things up.
+ - New class URI::URL::_login and made telnet, rlogin, tn3270
+ subclasses from this one.
+ - URI::URL::wais is better supported.
+ - URI::URL::news is better supported.
+ - New URI::URL::mailto methods: user/host
+
+ o HTTP::Date::time2str now works correctly with '0' as argument
+
+ o HTTP::Message delegates unknown methods to the headers.
+
+ o HTTP::Request::uri is an alias for HTTP::Request::url. Can set
+ the URL to undef.
+
+ o Added convenience methods to HTTP::Headers for easy access to
+ frequently used headers.
+
+ o Simplified LWP::Debug
+
+ o Use standard AutoLoader for LWP::IO functions.
+
+ o Played with the profiler (Devel::DProf) and made some things
+ go quicker.
+
+ o Included the File::CounterFile module. Excluded Mail::Cap module
+ as it is also part of the MailTools package.
+
+
+
+Mon Feb 5 1996 Gisle Aas <aas@sn.no>
+
+ o Release 5b7
+
+ o Perl5.002 is required now
+
+ o Rewrite of the URI::URL module (version 4.00)
+ - escaping/unsafe stuff redone (and removed)
+ - URI::URL::_generic moved out of URL.pm
+ - netloc, path, params, query is now stored internally in escaped form
+ - new methods for URI::URL::_generic are:
+ epath
+ eparams
+ equery
+ path_components
+ absolute_path
+ - new methods for URI::URL::http are:
+ keywords
+ query_form
+ - new methods for URI::URL::file are:
+ newlocal
+ local_path
+ unix_path
+ dos_path
+ mac_path
+ vms_path
+
+ o URI::Escape now semi-compile regular expressions (by evaling an
+ anonymous sub). Same technique is also used in HTML::Entities.
+
+ o HTTP::Date parser rewritten using regular expressions.
+
+ o HTTP::Headers->as_string() now croaks if any field values
+ contain newline.
+
+ o HTTP::Status constants use empty prototype.
+
+ o Font metrics moved to a new subdirectory (lib/Font/Metrics)
+
+ o Don't use the VERSION script any more (even if it was very clever)
+
+ o HTML::Entities will now export the names decode_entities() and
+ encode_entities().
+
+ o Andreas Koenig's black patch to HTML::Element.
+
+ o The HTML::Formatter now knows about <menu> and <dir> tags
+
+ o The construct "defined ref($arg)" did not work on perl5.002
+ because ref now always return something defined.
+
+ o LWP::UserAgent sends by default an 'User-Agent' header.
+
+ o LWP::Simple sends 'User-Agent' header to servers.
+
+ o Updated the LWP::Socket module to use the new Socket.pm interface.
+
+ o LWP::Protocol::http sends the new HTTP/1.1 'Host' header.
+
+ o LWP::Protocol::file use $url->local_path to get a file to open.
+ It also inserts a <BASE> tag in directories instead of a redirect.
+
+ o MIME::Base64 routines can be called as MIME::Base64::encode() and
+ MIME::Base64::decode(). Same kind of thing for QuotedPrint.
+
+
+
+Mon Nov 6 1995 Gisle Aas <aas@oslonett.no>
+
+ o Release 5b6
+
+ o Installation should work better for those that still runs
+ perl4 as 'perl'. The mirror script is not installed by
+ default.
+
+ o LWP::Socket::_getaddress() Numerical IP addresses did not work.
+
+ o LWP::Socket::pushback() did not work. This also avoids the bad
+ pp_select() core dump from perl.
+
+ o LWP::IO now also selects on socket exceptions.
+
+ o HTML::Parse: Ignore <!DOCTYPE ...> regardless for case. Some
+ bad insertElement() calls made infinite loops.
+
+ o The uri.t test works better for places where /tmp is a sym-link.
+
+
+
+Sat Sep 16 1995 Gisle Aas <aas@oslonett.no>
+
+ o Release 5b5
+
+ o newlocal URI::URL does not put "//localhost" into the URLs any
+ longer.
+
+ o The request program: New -o option to reformat the HTML code
+ New -C option to provide credentials on the command line.
+ The -b option never worked.
+
+ o LWP::Protocol::file now returns redirect for access to directories
+ where the trailing slash is missing.
+
+
+
+Thu Sep 14 1995 Gisle Aas <aas@oslonett.no>
+
+ o Speedups and bug fixes in the HTML parser. The parser now
+ understands some more deprecated tags (like <xmp> and <listing>).
+
+ o HTML::Elements are now stringified using perl overloading.
+ The interface to the traverse() callback has changed.
+
+ o Implemented HTML formatters for plain text and Postscript.
+
+ o Added lib/Font stuff to support the Postscript formatter.
+
+ o Inspired by Tim Bunce, I implemented the HTML::AsSubs module.
+ Don't know if it is really useful.
+
+ o The local/get test does not use /etc/passwd any more.
+
+
+
+Thu Sep 7 1995 Gisle Aas <aas@oslonett.no>
+
+ o Changed package name to libwww-perl-5xx
+
+ o Made LWP::Protocol::ftp actually transfer data
+
+ o Implemented methods for LWP::Socket to act as a server:
+ bind(), listen(), accept(), getsockname(), host(), port()
+
+
+
+Wed Sep 6 1995 Gisle Aas <aas@oslonett.no>
+
+ o Release 0.04
+
+ o Implemented modules to parse HTML.
+
+
+
+Mon Sep 4 1995 Gisle Aas <aas@oslonett.no>
+
+ o Implemented Mail::Cap which will become part of the MailTools
+ package.
+
+ o Moved Base64 to MIME::Base64. Reimplemented MIME::Base64 by using
+ [un]pack("u",...) Implemented LWP::MIME::QuotedPrint for
+ completeness' sake. Routine names has changed as suggested by Tim
+ Bunce.
+
+ o LWP::MediaType reads default types from media.types file.
+ guessMediaType() now also returns encodings. New function mediaSuffix()
+
+ o Pass $url to $ua->getBasicCredentials(). This also fixes security
+ hole with the old implementation of getBasicCredentials().
+
+ o LWP::Protocol::file now sets Content-Encoding headers
+
+ o Allow request content to be provided by a callback routine.
+
+ o Fix bug that prevented response callback to work. The first parameter
+ (data) is no longer a reference, because $_[0] is already a reference.
+ Don't callback unless successful response. Callbacks during redirects
+ was confusing.
+
+ o URI::URL. Remove port from netloc if it is the default port.
+ Don't use anything, just require.
+
+ o HTTP::Message->addContent() does not need a reference parameter.
+
+ o LWP::Socket->open() has been renamed top LWP::Socket->connect()
+ LWP::Socket->close has gone. Implemented new method LWP::Socket->read()
+ that returns as fast as it has some data to offer. Implemented
+ LWP::Socket->pushback().
+
+ o Does not die in LWP::UserAgent->request()
+
+ o LWP::Socket now use LWP::IO for low level IO
+
+ o Implemented LWP::TkIO as a replacement module for LWP::IO when using Tk.
+
+
+
+Thu Aug 17 1995 Gisle Aas <aas@oslonett.no>
+
+ o $ua->redirectOK() for checking redirects
+
+ o reorganized tests in the "t" directory.
+
+
+
+Fri Aug 11 1995 Gisle Aas <aas@oslonett.no>
+
+ o Release 0.03
+
+ o Included RobotRules.pm from Martijn Koster
+
+
+
+Thu Aug 10 1995 Gisle Aas <aas@oslonett.no>
+
+ o New module URI::Escape (URI::URL use this module for default
+ escaping) that provide the uri_escape() and uri_unescape()
+ functions.
+
+ o Setting $url->scheme now changes the class of the object.
+
+ o Made $httpurl->user() and $httpurl->password() illegal.
+ Likewise for other URL schemes.
+
+
+
+
+Wed Aug 9 1995 Gisle Aas <aas@oslonett.no>
+
+ o Reorganisation as discussed on <libwww-perl@ics.uci.edu>
+ LWP::Date --> HTTP::Date
+ LWP::MIMEheader --> HTTP::Headers
+ LWP::StatusCode --> HTTP::Status
+ LWP::Message --> HTTP::Message
+ LWP::Request --> HTTP::Request
+ LWP::Response --> HTTP::Response
+ LWP::MIMEtypes --> LWP::MediaTypes
+
+ o HTTP::Date parses ctime format with missing timezone as suggested
+ by Roy Fielding <fielding@beach.w3.org>
+
+ o HTTP::Status and LWP::MediaTypes exports their functions by default.
+
+ o Splitted up the URI::URL module. Schemes are implemented by separate
+ files that are autoloaded when used. Self test moved to "t/uri.t".
+
+
+
+Mon Aug 7 1995 Gisle Aas <aas@oslonett.no>
+
+ o Applied patch from Marc Hedlund <hedlund@best.com>
+ - Update the @header_order according to the August 3rd draft.
+ - Added Response Header fields: 'Location', 'Public', 'Retry-After',
+ 'Server', and 'WWW-Authenticate'.
+ - Moved 'unknown header' handling from &scan to &header. The old
+ implementation was forcing all unknown header-words to begin with
+ an uppercase (as it should be), but disallowed other uppercase
+ letters.
+ - updates the status code messages under the August
+ 3rd HTTP/1.0 draft. '203' became 'Non-Authoritative Information',
+ '303' became 'See Other', and a new code,
+ '411 Authorization Refused', was added.
+
+ o Can remove multiple headers with single removeHeader() call in MIMEheader.
+
+ o Can assign multiple field/value pairs in header() method of MIMEheader.
+
+ o A field with multiple values is printed as separate values in
+ MIMEheader::as_string().
+
+ o LWP::Response contain new attributes: previous() and request(). These
+ attributes are updated by the UserAgent.
+
+ o Appended \n to some die statements in Socket so that line numbers are
+ suppressed in error messages.
+
+ o Made UserAgent::clone work for reference members
+
+ o Check for redirect loops and multiple authorization failures by
+ examination of the response chain.
+
+ o Use "\015\012" instead of "\r\n" in protocol modules. Some systems
+ define \r and \n differently.
+
+ o request program can now handle documents that needs authorization by
+ prompting the user for username/password. Added new -S option to print
+ request/response chain.
+
+
+
+Tue Jul 25 1995 Gisle Aas <aas@oslonett.no>
+
+ o Release 0.02
+
+ o Included URI::URL in the release
+
+
+
+Mon Jul 24 1995 Gisle Aas <aas@oslonett.no>
+
+ o Incorporated Makemake.PL and VERSION from Andreas Koenig <koenig@mind.de>
+ As a result of this the following things have changed:
+ - programs in "bin" are extracted from .PL-files
+ - reintroduced "lib"
+ - "test" has been renamed as "t"
+ - test programs in "t" has been made Test::Harness compatible
+ - we now have a MANIFEST file
+ - no more need fro update_version, make-dist, lwp-install
+
+ o Renamed bin/get to bin/request. Links to it are now all upper case.
+
+ o Proxy support in bin/request (provided by Martijn Koster)
+
+ o UserAgent can now load proxy settings from environment
+
+ o LWP::Protocol::ftp is under way but not really finished
+
+
+
+Tue Jul 18 1995 Gisle Aas <aas@oslonett.no>
+
+ o Implemented LWP::Protocol::gopher
+
+ o Implemented LWP::Protocol::mailto
+
+ o Fixed proxy typo
+
+
+
+Mon Jul 17 1995 Gisle Aas <aas@oslonett.no>
+
+ o Made release 0.01
+
+
+
+Mon Jul 17 1995 Gisle Aas <aas@oslonett.no>
+
+ o Don't loose first line of HTTP/0.9 requests
+
+ o LWP::Socket use syswrite() for writing
+
+ o Added RC_* documentation to LWP::StatusCode
+
+ o LWP::Date now use hash to look up month numbers
+
+ o Added -f option to "get"
+
+ o Untabify
+
+ o Added a "TODO" list
+
+ o Fixed various typos
+
+
+
+Fri Jul 14 1995 Gisle Aas <aas@oslonett.no>
+
+ o Reorganized directories. Moved LWP.pm up. Moved file.pm and http.pm
+ into the LWP::Protocol directory. Moved LWP out of the lib directory
+ and removed lib.
+
+ o Implemented the "get" and "mirror" scripts in the "bin" directory.
+
+ o Implemented "install-lwp", "update_version" and "make-dist". The library
+ version number is found in the VERSION file.
+
+ o Always adds 1900 to the year in LWP::Date
+
+ o In LWP::MIMEheader: Implemented clone(), removeHeader() and scan()
+ methods. Reimplemented asString. Removed asMIME(). Moved "Good
+ Practice" into this file, and reimplemented it.
+
+ o Moved "header" and "content" into LWP::Message class. This change made
+ LWP::Request and LWP::Response much simpler. Made clone() method
+ actually work.
+
+ o LWP::Protocol::implementor does not die if it cannot load package.
+
+ o Moved UserAgent convenience methods into LWP::Simple. Made LWP::Simple
+ export LWP::StatusCode symbols and functions.
+
+ o Implemented $ua->isProtocolSupported($scheme) method.
+
+ o Nicer directory listing in LWP::Protocol::file.pm
+
+ o Rely on URI::URL 3.00 behaviour for $url->full_path
+
+ o Library version number now in LWP.pm. You should be able to say
+ "use LWP 1.0;" if you need at least this version.
+
+ o Various cleanups and arranging layout as I like it. Use fooBar-style
+ (as opposed to foo_bar style) everywhere. This means that as_string()
+ now is called asString().
+
+ o Added more documentation.
+
+
+
+Wed Jun 14 1995 Gisle Aas <aas@oslonett.no>
+
+ o Removed lot of redundant & before function calls.
+
+ o $this --> $self
+
+ o &collector passes content as a reference, don't want to copy so much
+
+ o parameterlist to collect callback has been rearranged
+
+ o Response::addContent gets a reference to the data
+
+ o Added some callback documentation to UserAgent.pm
+
+ o Protocol::file now uses the collector
+
+ o Introduced LWP::Simple
+
+
+
+Sun Jun 11 1995 Martijn Koster <m.koster@nexor.co.uk>
+
+ o Added basic authentication support
+
+ o Added mirroring of single documents
+
+ o Change Protocol construction from whacky URL.pm (constructor returns
+ subclass) to more normal C++'ish way.
+
+
+
+Wed June 7 1995 Martijn Koster <m.koster@nexor.co.uk>
+
+ o Minor cleanups from printed code inspection
+
+
+
+Wed May 24 1995 Martijn Koster <m.koster@nexor.co.uk>
+
+ o Added redirection resolution
+
+ o Added optional autoloading of protocols
+
+
+
+Tue May 23 1995 Martijn Koster <m.koster@nexor.co.uk>
+
+ o Separated socket stuff into separate module
+
+ o Added HTTP proxy support
+
+ o Made alarm handling optional
+
+ o Added a LWP::Message for completeness sake
+
+ o Added LWP::MemberMixin to reduce code duplication
+
+ o Cosmetic changes to LWP::Date
+
+ o Renamed LWP::Error to LWP::StatusCode
+
+ o Renamed LWP::MIME to LWP::MIMEtype
+
+ o Changed the tests to cope with all this
+
+It's getting there...
+
+
+
+Mon May 22 1995 Martijn Koster <m.koster@nexor.co.uk>
+
+ o Changed the socket reading to use sysread. This will have to go
+ into a module of its own.
+
+
+
+Thu 18 May 1995 Martijn Koster <m.koster@nexor.co.uk>
+
+ o Mentioned on libwww-perl that I had changed the classes around lots.
--- /dev/null
+AUTHORS Who made this
+Changes History of this package
+MANIFEST This file
+Makefile.PL Makefile generator
+README Get you started with this package
+README.SSL When you need SSL support
+bin/lwp-download Writes bin/lwp-download script
+bin/lwp-dump Writes bin/lwp-dump script
+bin/lwp-mirror Writes bin/lwp-mirror script
+bin/lwp-request Writes bin/lwp-request script
+bin/lwp-rget Writes bin/lwp-rget script
+talk-to-ourself Are we able to run tests talk HTTP to local server?
+lib/Bundle/LWP.pm Defines prereq modules
+lib/File/Listing.pm Parse directory listings
+lib/HTML/Form.pm Parse <form>...</form>
+lib/HTTP/Config.pm Look up stuff based on request/reponse properties
+lib/HTTP/Cookies.pm Cookie storage and management
+lib/HTTP/Cookies/Netscape.pm Deal with the Netscape cookie file format
+lib/HTTP/Cookies/Microsoft.pm Deal with the Microsoft MSIE cookie file format
+lib/HTTP/Daemon.pm A simple httpd
+lib/HTTP/Date.pm Date conversion routines
+lib/HTTP/Headers.pm Class encapsulating HTTP Message headers
+lib/HTTP/Headers/Auth.pm Some methods that deal with authorization.
+lib/HTTP/Headers/ETag.pm Some methods that deal with entity tags
+lib/HTTP/Headers/Util.pm Some utility functions for header values.
+lib/HTTP/Message.pm Class encapsulating HTTP messages
+lib/HTTP/Negotiate.pm Evaluate HTTP content negotiation algoritm
+lib/HTTP/Request.pm Class encapsulating HTTP Requests
+lib/HTTP/Request/Common.pm Generate common requests
+lib/HTTP/Response.pm Class encapsulating HTTP Responses
+lib/HTTP/Status.pm HTTP Status code processing
+lib/LWP.pm Includes what you need
+lib/LWP/Authen/Basic.pm Basic authentication scheme
+lib/LWP/Authen/Digest.pm Digest authentication scheme
+lib/LWP/Authen/Ntlm.pm NTLM authentication (Microsoft)
+lib/LWP/ConnCache.pm Connection cache
+lib/LWP/Debug.pm Debugging support
+lib/LWP/DebugFile.pm Send debug output to a file
+lib/LWP/MediaTypes.pm Library for guessing media types
+lib/LWP/MemberMixin.pm Helps you access %$self
+lib/LWP/Protocol.pm Virtual base class for LWP protocols
+lib/LWP/Protocol/GHTTP.pm Alternative HTTP protocol handler
+lib/LWP/Protocol/cpan.pm Access to cpan URLs
+lib/LWP/Protocol/data.pm Access to data URLs
+lib/LWP/Protocol/file.pm Access local files
+lib/LWP/Protocol/ftp.pm Access with the FTP protocol
+lib/LWP/Protocol/gopher.pm Access with the Gopher protocol
+lib/LWP/Protocol/http.pm Access with HTTP/1.1 protocol
+lib/LWP/Protocol/http10.pm Access with HTTP/1.0 protocol
+lib/LWP/Protocol/https.pm Access with HTTP/1.1 protocol over SSL
+lib/LWP/Protocol/https10.pm Access with HTTP/1.0 protocol over SSL
+lib/LWP/Protocol/loopback.pm Returns request (like HTTP TRACE)
+lib/LWP/Protocol/mailto.pm Allows you to POST mail using sendmail
+lib/LWP/Protocol/nntp.pm Handles access to news: and nntp: URLs
+lib/LWP/Protocol/nogo.pm Denies all requests.
+lib/LWP/RobotUA.pm Easy creation of conforming robots
+lib/LWP/Simple.pm Procedural LWP interface
+lib/LWP/UserAgent.pm A WWW UserAgent class
+lib/LWP/media.types Mapping from file extentions media types
+lib/Net/HTTP.pm Lower level HTTP on an IO::Socket::INET
+lib/Net/HTTP/Methods.pm Lower level HTTP/1.1 protocol
+lib/Net/HTTP/NB.pm Non-blocking HTTP
+lib/Net/HTTPS.pm Lower level HTTP on an SSL socket
+lib/WWW/RobotRules.pm Parse robot.txt files
+lib/WWW/RobotRules/AnyDBM_File.pm Persistent robot rules
+lwpcook.pod Libwww-perl examples
+lwptut.pod Libwww-perl tutorial
+t/README How to run and set up tests
+t/TEST Run tests
+t/base/common-req.t Test HTTP::Request::Common module
+t/base/cookies.t Test HTTP::Cookies module
+t/base/date.t Test HTTP::Date module
+t/base/headers-auth.t Test HTTP::Headers::Auth module
+t/base/headers-etag.t Test HTTP::Headers::ETag module
+t/base/headers-util.t Test HTTP::Headers::Util module
+t/base/headers.t Test HTTP::Headers module
+t/base/http.t Test Net::HTTP::Methods module
+t/base/http-config.t Test HTTP::Config module
+t/base/listing.t Test File::Listing module
+t/base/mediatypes.t Test LWP::MediaTypes module
+t/base/message.t Test HTTP::Message
+t/base/message-charset.t Test HTTP::Message content_charset method
+t/base/message-old.t Test HTTP::Request/HTTP::Response
+t/base/message-parts.t Test HTTP::Message parts method
+t/base/negotiate.t Test HTTP::Negotiation module
+t/base/protocols.t Test protocol methods of LWP::UserAgent
+t/base/request.t Test additional HTTP::Request methods
+t/base/response.t Test additional HTTP::Response methods
+t/base/status.t Test HTTP::Status module
+t/base/status-old.t Test HTTP::Status module
+t/base/ua.t Basic LWP::UserAgent tests
+t/html/form.t Test HTML::Form module
+t/html/form-param.t More HTML::Form tests.
+t/html/form-multi-select.t More HTML::Form tests
+t/html/form-maxlength.t More HTML::Form tests
+t/html/form-selector.t More HTML::Form tests
+t/live/apache.t
+t/live/apache-listing.t Test File::Listing::apache package
+t/live/https.t
+t/live/jigsaw-auth-b.t
+t/live/jigsaw-auth-d.t
+t/live/jigsaw-chunk.t
+t/live/jigsaw-md5-get.t
+t/live/jigsaw-md5.t
+t/live/jigsaw-neg-get.t
+t/live/jigsaw-neg.t
+t/live/jigsaw-te.t
+t/local/autoload-get.t
+t/local/autoload.t Test autoloading of LWP::Protocol modules
+t/local/chunked.t
+t/local/get.t Try to get a local file
+t/local/http.t Test http to local server
+t/local/protosub.t Test with other protocol module
+t/net/cgi-bin/moved
+t/net/cgi-bin/nph-slowdata
+t/net/cgi-bin/slowread
+t/net/cgi-bin/test
+t/net/cgi-bin/timeout
+t/net/config.pl.dist Suggested configuration for net tests
+t/net/http-get.t
+t/net/http-post.t
+t/net/http-timeout.t
+t/net/mirror.t
+t/net/moved.t
+t/net/proxy.t
+t/robot/rules-dbm.t Test WWW::RobotRules::AnyDBM_File
+t/robot/rules.t Test WWW::RobotRules
+t/robot/ua-get.t
+t/robot/ua.t Test LWP::RobotUA
+META.yml Module meta-data (added by MakeMaker)
--- /dev/null
+--- #YAML:1.0
+name: libwww-perl
+version: 5.836
+abstract: The World-Wide Web library for Perl
+author:
+ - Gisle Aas <gisle@activestate.com>
+license: perl
+distribution_type: module
+configure_requires:
+ ExtUtils::MakeMaker: 0
+build_requires:
+ ExtUtils::MakeMaker: 0
+requires:
+ Compress::Raw::Zlib: 0
+ Digest::MD5: 0
+ HTML::Parser: 3.33
+ HTML::Tagset: 0
+ IO::Compress::Deflate: 0
+ IO::Compress::Gzip: 0
+ IO::Uncompress::Gunzip: 0
+ IO::Uncompress::Inflate: 0
+ IO::Uncompress::RawInflate: 0
+ MIME::Base64: 2.1
+ Net::FTP: 2.58
+ perl: 5.006
+ URI: 1.10
+resources:
+ MailingList: mailto:libwww@perl.org
+ repository: http://github.com/gisle/libwww-perl
+no_index:
+ directory:
+ - t
+ - inc
+generated_by: ExtUtils::MakeMaker version 6.55_02
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+recommends:
+ Crypt::SSLeay: 0
--- /dev/null
+#!perl -w
+
+require 5.006;
+use strict;
+use ExtUtils::MakeMaker;
+use Getopt::Long qw(GetOptions);
+
+GetOptions(\my %opt,
+ 'aliases',
+ 'no-programs|n',
+ 'live-tests',
+) or do {
+ die "Usage: $0 [--aliases] [--no-programs] [--live-tests]\n";
+};
+
+my @prog;
+push(@prog, qw(lwp-request lwp-mirror lwp-rget lwp-download lwp-dump))
+ unless $opt{'no-programs'} || grep /^LIB=/, @ARGV;
+
+if ($opt{'aliases'} && grep(/lwp-request/, @prog)) {
+ require File::Copy;
+ for (qw(GET HEAD POST)) {
+ File::Copy::copy("bin/lwp-request", "bin/$_") || die "Can't copy bin/$_";
+ chmod(0755, "bin/$_");
+ push(@prog, $_);
+ }
+}
+
+system($^X, "talk-to-ourself");
+flag_file("t/CAN_TALK_TO_OURSELF", $? == 0);
+flag_file("t/live/ENABLED", $opt{'live-tests'});
+
+WriteMakefile(
+ NAME => 'LWP',
+ DISTNAME => 'libwww-perl',
+ VERSION_FROM => 'lib/LWP.pm',
+ ABSTRACT => 'The World-Wide Web library for Perl',
+ AUTHOR => 'Gisle Aas <gisle@activestate.com>',
+ EXE_FILES => [ map "bin/$_", @prog ],
+ LICENSE => "perl",
+ MIN_PERL_VERSION => 5.006,
+ PREREQ_PM => {
+ 'URI' => "1.10",
+ 'MIME::Base64' => "2.1",
+ 'Net::FTP' => "2.58",
+ 'HTML::Tagset' => 0,
+ 'HTML::Parser' => "3.33",
+ 'Digest::MD5' => 0,
+ 'Compress::Raw::Zlib' => 0,
+ 'IO::Compress::Gzip' => 0,
+ 'IO::Compress::Deflate' => 0,
+ 'IO::Uncompress::Gunzip' => 0,
+ 'IO::Uncompress::Inflate' => 0,
+ 'IO::Uncompress::RawInflate' => 0,
+ },
+ META_MERGE => {
+ recommends => {
+ 'Crypt::SSLeay' => 0,
+ },
+ resources => {
+ repository => 'http://github.com/gisle/libwww-perl',
+ MailingList => 'mailto:libwww@perl.org',
+ }
+ },
+ clean => { FILES => join(" ", map "bin/$_", grep /^[A-Z]+$/, @prog) },
+);
+
+if($] >= 5.008 && !(eval { require Encode; defined(Encode::decode("UTF-8", "\xff")) })) {
+ warn "\nYou lack a working Encode module, and so you will miss out on\n".
+ "lots of character set goodness from LWP. However, your perl is\n".
+ "sufficiently recent to support it. It is recommended that you\n".
+ "install the latest Encode from CPAN.\n\n";
+}
+
+
+
+sub MY::test
+{
+ q(
+TEST_VERBOSE=0
+
+test : pure_all
+ $(FULLPERL) t/TEST $(TEST_VERBOSE)
+
+test_hudson : pure_all
+ $(FULLPERL) t/TEST $(TEST_VERBOSE) --formatter=TAP::Formatter::JUnit
+
+);
+}
+
+
+sub flag_file {
+ my($file, $create) = @_;
+ if ($create) {
+ open(my $fh, ">", $file) || die "Can't create $file: $!";
+ }
+ else {
+ unlink($file);
+ }
+}
+
+BEGIN {
+ # compatibility with older versions of MakeMaker
+ my $developer = -f "NOTES.txt";
+ my %mm_req = (
+ LICENCE => 6.31,
+ META_MERGE => 6.45,
+ META_ADD => 6.45,
+ MIN_PERL_VERSION => 6.48,
+ );
+ undef(*WriteMakefile);
+ *WriteMakefile = sub {
+ my %arg = @_;
+ for (keys %mm_req) {
+ unless (eval { ExtUtils::MakeMaker->VERSION($mm_req{$_}) }) {
+ warn "$_ $@" if $developer;
+ delete $arg{$_};
+ }
+ }
+ ExtUtils::MakeMaker::WriteMakefile(%arg);
+ };
+}
--- /dev/null
+
+ L I B W W W - P E R L - 5
+ -----------------------------
+
+
+The libwww-perl collection is a set of Perl modules which provides a
+simple and consistent application programming interface to the
+World-Wide Web. The main focus of the library is to provide classes
+and functions that allow you to write WWW clients. The library also
+contain modules that are of more general use and even classes that
+help you implement simple HTTP servers.
+
+
+PREREQUISITES
+
+In order to install and use this package you will need Perl version
+5.6 or better. Some modules within this package depend on other
+packages that are distributed separately from Perl. We recommend that
+you have the following packages installed before you install
+libwww-perl:
+
+ URI
+ MIME-Base64
+ HTML-Tagset
+ HTML-Parser
+ libnet
+ Digest-MD5
+ Compress-Zlib
+
+If you want to access sites using the https protocol, then you need to
+install the Crypt::SSLeay or the IO::Socket::SSL module. The
+README.SSL file will tell you more about how libwww-perl supports SSL.
+
+
+INSTALLATION
+
+You install libwww-perl using the normal perl module distribution drill:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+If you don't want to install any programs (only the library files) then
+pass the '--no-programs' option to Makefile.PL:
+
+ perl Makefile.PL --no-programs
+
+
+DOCUMENTATION
+
+See the lib/LWP.pm file for an overview of the library. See the
+Changes file for recent changes.
+
+POD style documentation is included in all modules and scripts. These
+are normally converted to manual pages and installed as part of the
+"make install" process. You should also be able to use the 'perldoc'
+utility to extract and read documentation from the module files
+directly.
+
+
+SUPPORT
+
+Bug reports and suggestions for improvements can be sent to the
+<libwww@perl.org> mailing list. This mailing list is also the place
+for general discussions and development of the libwww-perl package.
+
+
+AVAILABILITY
+
+The latest version of libwww-perl is available from CPAN:
+
+ http://search.cpan.org/dist/libwww-perl/
+
+If you want to hack on the source it might be a good idea to grab the
+latest version with git using the command:
+
+ git clone git://github.com/gisle/libwww-perl.git lwp
+
+You can also browse the git repository at:
+
+ http://github.com/gisle/libwww-perl
+
+
+COPYRIGHT
+
+ © 1995-2009 Gisle Aas. All rights reserved.
+ © 1995 Martijn Koster. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+Enjoy!
--- /dev/null
+SSL SUPPORT
+-----------
+
+The libwww-perl package has support for using SSL/TLSv1 with its HTTP
+client and server classes. This support makes it possible to access
+https schemed URLs with LWP. Because of the problematic status of
+encryption software in general and certain encryption algorithms in
+particular, in several countries, libwww-perl package doesn't include
+SSL functionality out-of-the-box.
+
+Encryption support is obtained through the use of Crypt::SSLeay or
+IO::Socket::SSL, which can both be found from CPAN. While libwww-perl
+has "plug-and-play" support for both of these modules (as of v5.45),
+the recommended module to use is Crypt::SSLeay. In addition to
+bringing SSL support to the LWP package, IO::Socket::SSL can be used
+as an object oriented interface to SSL encrypted network sockets.
+
+There is yet another SSL interface for perl called Net::SSLeay. It has
+a more complete SSL interface and can be used for web client
+programming among other things but doesn't directly support LWP.
+
+The underlying SSL support in all of these modules is based on OpenSSL
+<http://www.openssl.org/> (formerly SSLeay). For WWW-server side SSL
+support (e.g. CGI/FCGI scripts) in Apache see <http://www.modssl.org/>.
--- /dev/null
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+lwp-download - Fetch large files from the web
+
+=head1 SYNOPSIS
+
+B<lwp-download> [B<-a>] [B<-s>] <I<url>> [<I<local path>>]
+
+=head1 DESCRIPTION
+
+The B<lwp-download> program will save the file at I<url> to a local
+file.
+
+If I<local path> is not specified, then the current directory is
+assumed.
+
+If I<local path> is a directory, then the last segment of the path of the
+I<url> is appended to form a local filename. If the I<url> path ends with
+slash the name "index" is used. With the B<-s> option pick up the last segment
+of the filename from server provided sources like the Content-Disposition
+header or any redirect URLs. A file extension to match the server reported
+Content-Type might also be appended. If a file with the produced filename
+already exists, then B<lwp-download> will prompt before it overwrites and will
+fail if its standard input is not a terminal. This form of invocation will
+also fail is no acceptable filename can be derived from the sources mentioned
+above.
+
+If I<local path> is not a directory, then it is simply used as the
+path to save into. If the file already exists it's overwritten.
+
+The I<lwp-download> program is implemented using the I<libwww-perl>
+library. It is better suited to down load big files than the
+I<lwp-request> program because it does not store the file in memory.
+Another benefit is that it will keep you updated about its progress
+and that you don't have much options to worry about.
+
+Use the C<-a> option to save the file in text (ascii) mode. Might
+make a difference on dosish systems.
+
+=head1 EXAMPLE
+
+Fetch the newest and greatest perl version:
+
+ $ lwp-download http://www.perl.com/CPAN/src/latest.tar.gz
+ Saving to 'latest.tar.gz'...
+ 11.4 MB received in 8 seconds (1.43 MB/sec)
+
+=head1 AUTHOR
+
+Gisle Aas <gisle@aas.no>
+
+=cut
+
+#' get emacs out of quote mode
+
+use strict;
+
+use LWP::UserAgent ();
+use LWP::MediaTypes qw(guess_media_type media_suffix);
+use URI ();
+use HTTP::Date ();
+
+my $progname = $0;
+$progname =~ s,.*/,,; # only basename left in progname
+$progname =~ s,.*\\,, if $^O eq "MSWin32";
+$progname =~ s/\.\w*$//; # strip extension if any
+
+#parse option
+use Getopt::Std;
+my %opt;
+unless (getopts('as', \%opt)) {
+ usage();
+}
+
+my $url = URI->new(shift || usage());
+my $argfile = shift;
+usage() if defined($argfile) && !length($argfile);
+my $VERSION = "5.835";
+
+my $ua = LWP::UserAgent->new(
+ agent => "lwp-download/$VERSION ",
+ keep_alive => 1,
+ env_proxy => 1,
+);
+
+my $file; # name of file we download into
+my $length; # total number of bytes to download
+my $flength; # formatted length
+my $size = 0; # number of bytes received
+my $start_t; # start time of download
+my $last_dur; # time of last callback
+
+my $shown = 0; # have we called the show() function yet
+
+$SIG{INT} = sub { die "Interrupted\n"; };
+
+$| = 1; # autoflush
+
+my $res = $ua->request(HTTP::Request->new(GET => $url),
+ sub {
+ unless(defined $file) {
+ my $res = $_[1];
+
+ my $directory;
+ if (defined $argfile && -d $argfile) {
+ ($directory, $argfile) = ($argfile, undef);
+ }
+
+ unless (defined $argfile) {
+ # find a suitable name to use
+ $file = $opt{s} && $res->filename;
+
+ # if this fails we try to make something from the URL
+ unless ($file) {
+ $file = ($url->path_segments)[-1];
+ if (!defined($file) || !length($file)) {
+ $file = "index";
+ my $suffix = media_suffix($res->content_type);
+ $file .= ".$suffix" if $suffix;
+ }
+ elsif ($url->scheme eq 'ftp' ||
+ $file =~ /\.t[bg]z$/ ||
+ $file =~ /\.tar(\.(Z|gz|bz2?))?$/
+ ) {
+ # leave the filename as it was
+ }
+ else {
+ my $ct = guess_media_type($file);
+ unless ($ct eq $res->content_type) {
+ # need a better suffix for this type
+ my $suffix = media_suffix($res->content_type);
+ $file .= ".$suffix" if $suffix;
+ }
+ }
+ }
+
+ # validate that we don't have a harmful filename now. The server
+ # might try to trick us into doing something bad.
+ if (!length($file) ||
+ $file =~ s/([^a-zA-Z0-9_\.\-\+\~])/sprintf "\\x%02x", ord($1)/ge ||
+ $file =~ /^\./
+ )
+ {
+ die "Will not save <$url> as \"$file\".\nPlease override file name on the command line.\n";
+ }
+
+ if (defined $directory) {
+ require File::Spec;
+ $file = File::Spec->catfile($directory, $file);
+ }
+
+ # Check if the file is already present
+ if (-l $file) {
+ die "Will not save <$url> to link \"$file\".\nPlease override file name on the command line.\n";
+ }
+ elsif (-f _) {
+ die "Will not save <$url> as \"$file\" without verification.\nEither run from terminal or override file name on the command line.\n"
+ unless -t;
+ $shown = 1;
+ print "Overwrite $file? [y] ";
+ my $ans = <STDIN>;
+ unless (defined($ans) && $ans =~ /^y?\n/) {
+ if (defined $ans) {
+ print "Ok, aborting.\n";
+ }
+ else {
+ print "\nAborting.\n";
+ }
+ exit 1;
+ }
+ $shown = 0;
+ }
+ elsif (-e _) {
+ die "Will not save <$url> as \"$file\". Path exists.\n";
+ }
+ else {
+ print "Saving to '$file'...\n";
+ use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
+ sysopen(FILE, $file, O_WRONLY|O_EXCL|O_CREAT) ||
+ die "Can't open $file: $!";
+ }
+ }
+ else {
+ $file = $argfile;
+ }
+ unless (fileno(FILE)) {
+ open(FILE, ">", $file) || die "Can't open $file: $!\n";
+ }
+ binmode FILE unless $opt{a};
+ $length = $res->content_length;
+ $flength = fbytes($length) if defined $length;
+ $start_t = time;
+ $last_dur = 0;
+ }
+
+ print FILE $_[0] or die "Can't write to $file: $!\n";
+ $size += length($_[0]);
+
+ if (defined $length) {
+ my $dur = time - $start_t;
+ if ($dur != $last_dur) { # don't update too often
+ $last_dur = $dur;
+ my $perc = $size / $length;
+ my $speed;
+ $speed = fbytes($size/$dur) . "/sec" if $dur > 3;
+ my $secs_left = fduration($dur/$perc - $dur);
+ $perc = int($perc*100);
+ my $show = "$perc% of $flength";
+ $show .= " (at $speed, $secs_left remaining)" if $speed;
+ show($show, 1);
+ }
+ }
+ else {
+ show( fbytes($size) . " received");
+ }
+ }
+);
+
+if (fileno(FILE)) {
+ close(FILE) || die "Can't write to $file: $!\n";
+
+ show(""); # clear text
+ print "\r";
+ print fbytes($size);
+ print " of ", fbytes($length) if defined($length) && $length != $size;
+ print " received";
+ my $dur = time - $start_t;
+ if ($dur) {
+ my $speed = fbytes($size/$dur) . "/sec";
+ print " in ", fduration($dur), " ($speed)";
+ }
+ print "\n";
+
+ if (my $mtime = $res->last_modified) {
+ utime time, $mtime, $file;
+ }
+
+ if ($res->header("X-Died") || !$res->is_success) {
+ if (my $died = $res->header("X-Died")) {
+ print "$died\n";
+ }
+ if (-t) {
+ print "Transfer aborted. Delete $file? [n] ";
+ my $ans = <STDIN>;
+ if (defined($ans) && $ans =~ /^y\n/) {
+ unlink($file) && print "Deleted.\n";
+ }
+ elsif ($length > $size) {
+ print "Truncated file kept: ", fbytes($length - $size), " missing\n";
+ }
+ else {
+ print "File kept.\n";
+ }
+ exit 1;
+ }
+ else {
+ print "Transfer aborted, $file kept\n";
+ }
+ }
+ exit 0;
+}
+
+# Did not manage to create any file
+print "\n" if $shown;
+if (my $xdied = $res->header("X-Died")) {
+ print "$progname: Aborted\n$xdied\n";
+}
+else {
+ print "$progname: ", $res->status_line, "\n";
+}
+exit 1;
+
+
+sub fbytes
+{
+ my $n = int(shift);
+ if ($n >= 1024 * 1024) {
+ return sprintf "%.3g MB", $n / (1024.0 * 1024);
+ }
+ elsif ($n >= 1024) {
+ return sprintf "%.3g KB", $n / 1024.0;
+ }
+ else {
+ return "$n bytes";
+ }
+}
+
+sub fduration
+{
+ use integer;
+ my $secs = int(shift);
+ my $hours = $secs / (60*60);
+ $secs -= $hours * 60*60;
+ my $mins = $secs / 60;
+ $secs %= 60;
+ if ($hours) {
+ return "$hours hours $mins minutes";
+ }
+ elsif ($mins >= 2) {
+ return "$mins minutes";
+ }
+ else {
+ $secs += $mins * 60;
+ return "$secs seconds";
+ }
+}
+
+
+BEGIN {
+ my @ani = qw(- \ | /);
+ my $ani = 0;
+
+ sub show
+ {
+ my($mess, $show_ani) = @_;
+ print "\r$mess" . (" " x (75 - length $mess));
+ print $show_ani ? "$ani[$ani++]\b" : " ";
+ $ani %= @ani;
+ $shown++;
+ }
+}
+
+sub usage
+{
+ die "Usage: $progname [-a] <url> [<lpath>]\n";
+}
--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use LWP::UserAgent ();
+use Getopt::Long qw(GetOptions);
+
+my $VERSION = "5.827";
+
+GetOptions(\my %opt,
+ 'parse-head',
+ 'max-length=n',
+ 'keep-client-headers',
+ 'method=s',
+ 'agent=s',
+) || usage();
+
+my $url = shift || usage();
+@ARGV && usage();
+
+sub usage {
+ (my $progname = $0) =~ s,.*/,,;
+ die <<"EOT";
+Usage: $progname [options] <url>
+
+Recognized options are:
+ --agent <str>
+ --keep-client-headers
+ --max-length <n>
+ --method <str>
+ --parse-head
+
+EOT
+}
+
+my $ua = LWP::UserAgent->new(
+ parse_head => $opt{'parse-head'} || 0,
+ keep_alive => 1,
+ env_proxy => 1,
+ agent => $opt{agent} || "lwp-dump/$VERSION ",
+);
+
+my $req = HTTP::Request->new($opt{method} || 'GET' => $url);
+my $res = $ua->simple_request($req);
+$res->remove_header(grep /^Client-/, $res->header_field_names)
+ unless $opt{'keep-client-headers'} or
+ ($res->header("Client-Warning") || "") eq "Internal response";
+
+$res->dump(maxlength => $opt{'max-length'});
+
+__END__
+
+=head1 NAME
+
+lwp-dump - See what headers and content is returned for a URL
+
+=head1 SYNOPSIS
+
+B<lwp-dump> [ I<options> ] I<URL>
+
+=head1 DESCRIPTION
+
+The B<lwp-dump> program will get the resource indentified by the URL and then
+dump the response object to STDOUT. This will display the headers returned and
+the initial part of the content, escaped so that it's safe to display even
+binary content. The escapes syntax used is the same as for Perl's double
+quoted strings. If there is no content the string "(no content)" is shown in
+its place.
+
+The following options are recognized:
+
+=over
+
+=item B<--agent> I<str>
+
+Override the user agent string passed to the server.
+
+=item B<--keep-client-headers>
+
+LWP internally generate various C<Client-*> headers that are stripped by
+B<lwp-dump> in order to show the headers exactly as the server provided them.
+This option will suppress this.
+
+=item B<--max-length> I<n>
+
+How much of the content to show. The default is 512. Set this
+to 0 for unlimited.
+
+If the content is longer then the string is chopped at the
+limit and the string "...\n(### more bytes not shown)"
+appended.
+
+=item B<--method> I<str>
+
+Use the given method for the request instead of the default "GET".
+
+=item B<--parse-head>
+
+By default B<lwp-dump> will not try to initialize headers by looking at the
+head section of HTML documents. This option enables this. This corresponds to
+L<LWP::UserAgent/"parse_head">.
+
+=back
+
+=head1 SEE ALSO
+
+L<lwp-request>, L<LWP>, L<HTTP::Message/"dump">
+
--- /dev/null
+#!/usr/bin/perl -w
+
+# Simple mirror utility using LWP
+
+=head1 NAME
+
+lwp-mirror - Simple mirror utility
+
+=head1 SYNOPSIS
+
+ lwp-mirror [-v] [-t timeout] <url> <local file>
+
+=head1 DESCRIPTION
+
+This program can be used to mirror a document from a WWW server. The
+document is only transfered if the remote copy is newer than the local
+copy. If the local copy is newer nothing happens.
+
+Use the C<-v> option to print the version number of this program.
+
+The timeout value specified with the C<-t> option. The timeout value
+is the time that the program will wait for response from the remote
+server before it fails. The default unit for the timeout value is
+seconds. You might append "m" or "h" to the timeout value to make it
+minutes or hours, respectively.
+
+Because this program is implemented using the LWP library, it only
+supports the protocols that LWP supports.
+
+=head1 SEE ALSO
+
+L<lwp-request>, L<LWP>
+
+=head1 AUTHOR
+
+Gisle Aas <gisle@aas.no>
+
+=cut
+
+
+use LWP::Simple qw(mirror is_success status_message $ua);
+use Getopt::Std;
+
+$progname = $0;
+$progname =~ s,.*/,,; # use basename only
+$progname =~ s/\.\w*$//; #strip extension if any
+
+$VERSION = "5.810";
+
+$opt_h = undef; # print usage
+$opt_v = undef; # print version
+$opt_t = undef; # timeout
+
+unless (getopts("hvt:")) {
+ usage();
+}
+
+if ($opt_v) {
+ require LWP;
+ my $DISTNAME = 'libwww-perl-' . LWP::Version();
+ die <<"EOT";
+This is lwp-mirror version $VERSION ($DISTNAME)
+
+Copyright 1995-1999, Gisle Aas.
+
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+EOT
+}
+
+$url = shift or usage();
+$file = shift or usage();
+usage() if $opt_h or @ARGV;
+
+if (defined $opt_t) {
+ $opt_t =~ /^(\d+)([smh])?/;
+ die "$progname: Illegal timeout value!\n" unless defined $1;
+ $timeout = $1;
+ $timeout *= 60 if ($2 eq "m");
+ $timeout *= 3600 if ($2 eq "h");
+ $ua->timeout($timeout);
+}
+
+$rc = mirror($url, $file);
+
+if ($rc == 304) {
+ print STDERR "$progname: $file is up to date\n"
+}
+elsif (!is_success($rc)) {
+ print STDERR "$progname: $rc ", status_message($rc), " ($url)\n";
+ exit 1;
+}
+exit;
+
+
+sub usage
+{
+ die <<"EOT";
+Usage: $progname [-options] <url> <file>
+ -v print version number of program
+ -t <timeout> Set timeout value
+EOT
+}
--- /dev/null
+#!/usr/bin/perl -w
+
+# Simple user agent using LWP library.
+
+=head1 NAME
+
+lwp-request, GET, POST, HEAD - Simple command line user agent
+
+=head1 SYNOPSIS
+
+B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>]
+ [B<-i> I<if-modified-since>] [B<-c> I<content-type>]
+ [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>...
+
+=head1 DESCRIPTION
+
+This program can be used to send requests to WWW servers and your
+local file system. The request content for POST and PUT
+methods is read from stdin. The content of the response is printed on
+stdout. Error messages are printed on stderr. The program returns a
+status value indicating the number of URLs that failed.
+
+The options are:
+
+=over 4
+
+=item -m <method>
+
+Set which method to use for the request. If this option is not used,
+then the method is derived from the name of the program.
+
+=item -f
+
+Force request through, even if the program believes that the method is
+illegal. The server might reject the request eventually.
+
+=item -b <uri>
+
+This URI will be used as the base URI for resolving all relative URIs
+given as argument.
+
+=item -t <timeout>
+
+Set the timeout value for the requests. The timeout is the amount of
+time that the program will wait for a response from the remote server
+before it fails. The default unit for the timeout value is seconds.
+You might append "m" or "h" to the timeout value to make it minutes or
+hours, respectively. The default timeout is '3m', i.e. 3 minutes.
+
+=item -i <time>
+
+Set the If-Modified-Since header in the request. If I<time> is the
+name of a file, use the modification timestamp for this file. If
+I<time> is not a file, it is parsed as a literal date. Take a look at
+L<HTTP::Date> for recognized formats.
+
+=item -c <content-type>
+
+Set the Content-Type for the request. This option is only allowed for
+requests that take a content, i.e. POST and PUT. You can
+force methods to take content by using the C<-f> option together with
+C<-c>. The default Content-Type for POST is
+C<application/x-www-form-urlencoded>. The default Content-type for
+the others is C<text/plain>.
+
+=item -p <proxy-url>
+
+Set the proxy to be used for the requests. The program also loads
+proxy settings from the environment. You can disable this with the
+C<-P> option.
+
+=item -P
+
+Don't load proxy settings from environment.
+
+=item -H <header>
+
+Send this HTTP header with each request. You can specify several, e.g.:
+
+ lwp-request \
+ -H 'Referer: http://other.url/' \
+ -H 'Host: somehost' \
+ http://this.url/
+
+=item -C <username>:<password>
+
+Provide credentials for documents that are protected by Basic
+Authentication. If the document is protected and you did not specify
+the username and password with this option, then you will be prompted
+to provide these values.
+
+=back
+
+The following options controls what is displayed by the program:
+
+=over 4
+
+=item -u
+
+Print request method and absolute URL as requests are made.
+
+=item -U
+
+Print request headers in addition to request method and absolute URL.
+
+=item -s
+
+Print response status code. This option is always on for HEAD requests.
+
+=item -S
+
+Print response status chain. This shows redirect and authorization
+requests that are handled by the library.
+
+=item -e
+
+Print response headers. This option is always on for HEAD requests.
+
+=item -d
+
+Do B<not> print the content of the response.
+
+=item -o <format>
+
+Process HTML content in various ways before printing it. If the
+content type of the response is not HTML, then this option has no
+effect. The legal format values are; I<text>, I<ps>, I<links>,
+I<html> and I<dump>.
+
+If you specify the I<text> format then the HTML will be formatted as
+plain latin1 text. If you specify the I<ps> format then it will be
+formatted as Postscript.
+
+The I<links> format will output all links found in the HTML document.
+Relative links will be expanded to absolute ones.
+
+The I<html> format will reformat the HTML code and the I<dump> format
+will just dump the HTML syntax tree.
+
+Note that the C<HTML-Tree> distribution needs to be installed for this
+option to work. In addition the C<HTML-Format> distribution needs to
+be installed for I<-o text> or I<-o ps> to work.
+
+=item -v
+
+Print the version number of the program and quit.
+
+=item -h
+
+Print usage message and quit.
+
+=item -a
+
+Set text(ascii) mode for content input and output. If this option is not
+used, content input and output is done in binary mode.
+
+=back
+
+Because this program is implemented using the LWP library, it will
+only support the protocols that LWP supports.
+
+=head1 SEE ALSO
+
+L<lwp-mirror>, L<LWP>
+
+=head1 COPYRIGHT
+
+Copyright 1995-1999 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Gisle Aas <gisle@aas.no>
+
+=cut
+
+$progname = $0;
+$progname =~ s,.*[\\/],,; # use basename only
+$progname =~ s/\.\w*$//; # strip extension, if any
+
+$VERSION = "5.834";
+
+
+require LWP;
+
+use URI;
+use URI::Heuristic qw(uf_uri);
+
+use HTTP::Status qw(status_message);
+use HTTP::Date qw(time2str str2time);
+
+
+# This table lists the methods that are allowed. It should really be
+# a superset for all methods supported for every scheme that may be
+# supported by the library. Currently it might be a bit too HTTP
+# specific. You might use the -f option to force a method through.
+#
+# "" = No content in request, "C" = Needs content in request
+#
+%allowed_methods = (
+ GET => "",
+ HEAD => "",
+ POST => "C",
+ PUT => "C",
+ DELETE => "",
+ TRACE => "",
+ OPTIONS => "",
+);
+
+
+# We make our own specialization of LWP::UserAgent that asks for
+# user/password if document is protected.
+{
+ package RequestAgent;
+ @ISA = qw(LWP::UserAgent);
+
+ sub new
+ {
+ my $self = LWP::UserAgent::new(@_);
+ $self->agent("lwp-request/$main::VERSION ");
+ $self;
+ }
+
+ sub get_basic_credentials
+ {
+ my($self, $realm, $uri) = @_;
+ if ($main::options{'C'}) {
+ return split(':', $main::options{'C'}, 2);
+ }
+ elsif (-t) {
+ my $netloc = $uri->host_port;
+ print STDERR "Enter username for $realm at $netloc: ";
+ my $user = <STDIN>;
+ chomp($user);
+ return (undef, undef) unless length $user;
+ print STDERR "Password: ";
+ system("stty -echo");
+ my $password = <STDIN>;
+ system("stty echo");
+ print STDERR "\n"; # because we disabled echo
+ chomp($password);
+ return ($user, $password);
+ }
+ else {
+ return (undef, undef)
+ }
+ }
+}
+
+$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
+
+# Parse command line
+use Getopt::Long;
+
+my @getopt_args = (
+ 'a', # content i/o in text(ascii) mode
+ 'm=s', # set method
+ 'f', # make request even if method is not in %allowed_methods
+ 'b=s', # base url
+ 't=s', # timeout
+ 'i=s', # if-modified-since
+ 'c=s', # content type for POST
+ 'C=s', # credentials for basic authorization
+ 'H=s@', # extra headers, form "Header: value string"
+ #
+ 'u', # display method, URL and headers of request
+ 'U', # display request headers also
+ 's', # display status code
+ 'S', # display whole chain of status codes
+ 'e', # display response headers (default for HEAD)
+ 'd', # don't display content
+ #
+ 'h', # print usage
+ 'v', # print version
+ #
+ 'p=s', # proxy URL
+ 'P', # don't load proxy setting from environment
+ #
+ 'o=s', # output format
+);
+
+Getopt::Long::config("noignorecase", "bundling");
+unless (GetOptions(\%options, @getopt_args)) {
+ usage();
+}
+if ($options{'v'}) {
+ require LWP;
+ my $DISTNAME = 'libwww-perl-' . LWP::Version();
+ die <<"EOT";
+This is lwp-request version $VERSION ($DISTNAME)
+
+Copyright 1995-1999, Gisle Aas.
+
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+EOT
+}
+
+usage() if $options{'h'} || !@ARGV;
+
+# Create the user agent object
+$ua = RequestAgent->new;
+
+# Load proxy settings from *_proxy environment variables.
+$ua->env_proxy unless $options{'P'};
+
+$method = uc($options{'m'}) if defined $options{'m'};
+
+if ($options{'f'}) {
+ if ($options{'c'}) {
+ $allowed_methods{$method} = "C"; # force content
+ }
+ else {
+ $allowed_methods{$method} = "";
+ }
+}
+elsif (!defined $allowed_methods{$method}) {
+ die "$progname: $method is not an allowed method\n";
+}
+
+if ($method eq "HEAD") {
+ $options{'s'} = 1;
+ $options{'e'} = 1 unless $options{'d'};
+ $options{'d'} = 1;
+}
+
+if (defined $options{'t'}) {
+ $options{'t'} =~ /^(\d+)([smh])?/;
+ die "$progname: Illegal timeout value!\n" unless defined $1;
+ $timeout = $1;
+ if (defined $2) {
+ $timeout *= 60 if $2 eq "m";
+ $timeout *= 3600 if $2 eq "h";
+ }
+ $ua->timeout($timeout);
+}
+
+if (defined $options{'i'}) {
+ if (-e $options{'i'}) {
+ $time = (stat _)[9];
+ }
+ else {
+ $time = str2time($options{'i'});
+ die "$progname: Illegal time syntax for -i option\n"
+ unless defined $time;
+ }
+ $options{'i'} = time2str($time);
+}
+
+$content = undef;
+if ($allowed_methods{$method} eq "C") {
+ # This request needs some content
+ unless (defined $options{'c'}) {
+ # set default content type
+ $options{'c'} = ($method eq "POST") ?
+ "application/x-www-form-urlencoded"
+ : "text/plain";
+ }
+ else {
+ die "$progname: Illegal Content-type format\n"
+ unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,
+ }
+ print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n"
+ if -t;
+ binmode STDIN unless -t or $options{'a'};
+ $content = join("", <STDIN>);
+}
+else {
+ die "$progname: Can't set Content-type for $method requests\n"
+ if defined $options{'c'};
+}
+
+# Set up a request. We will use the same request object for all URLs.
+$request = HTTP::Request->new($method);
+$request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'};
+for my $user_header (@{ $options{'H'} || [] }) {
+ my ($header_name, $header_value) = split /:\s*/, $user_header, 2;
+ $request->header($header_name, $header_value);
+ $ua->agent($header_value) if lc($header_name) eq "user-agent"; # Ugh!
+}
+#$request->header('Accept', '*/*');
+if ($options{'c'}) { # will always be set for request that wants content
+ $request->header('Content-Type', $options{'c'});
+ $request->header('Content-Length', length $content); # Not really needed
+ $request->content($content);
+}
+
+$errors = 0;
+
+# Ok, now we perform the requests, one URL at a time
+while ($url = shift) {
+ # Create the URL object, but protect us against bad URLs
+ eval {
+ if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification
+ $url = URI->new($url, $options{'b'});
+ $url = $url->abs($options{'b'}) if $options{'b'};
+ }
+ else {
+ $url = uf_uri($url);
+ }
+ };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//;
+ print STDERR $@;
+ $errors++;
+ next;
+ }
+
+ $ua->proxy($url->scheme, $options{'p'}) if $options{'p'};
+
+ # Send the request and get a response back from the server
+ $request->uri($url);
+ $response = $ua->request($request);
+
+ if ($options{'u'} || $options{'U'}) {
+ my $url = $response->request->uri->as_string;
+ print "$method $url\n";
+ print $response->request->headers_as_string, "\n" if $options{'U'};
+ }
+
+ if ($options{'S'}) {
+ for my $r ($response->redirects, $response) {
+ my $method = $r->request->method;
+ my $url = $r->request->uri->as_string;
+ print "$method $url --> ", $r->status_line, "\n";
+ }
+ }
+ elsif ($options{'s'}) {
+ print $response->status_line, "\n";
+ }
+
+ if ($options{'e'}) {
+ # Display headers
+ print $response->headers_as_string;
+ print "\n"; # separate headers and content
+ }
+
+ unless ($options{'d'}) {
+ if ($options{'o'} &&
+ $response->content_type eq 'text/html') {
+ eval {
+ require HTML::Parse;
+ };
+ if ($@) {
+ if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) {
+ die "The HTML-Tree distribution need to be installed for the -o option to be used.\n";
+ }
+ else {
+ die $@;
+ }
+ }
+ my $html = HTML::Parse::parse_html($response->content);
+ {
+ $options{'o'} eq 'ps' && do {
+ require HTML::FormatPS;
+ my $f = HTML::FormatPS->new;
+ print $f->format($html);
+ last;
+ };
+ $options{'o'} eq 'text' && do {
+ require HTML::FormatText;
+ my $f = HTML::FormatText->new;
+ print $f->format($html);
+ last;
+ };
+ $options{'o'} eq 'html' && do {
+ print $html->as_HTML;
+ last;
+ };
+ $options{'o'} eq 'links' && do {
+ my $base = $response->base;
+ $base = $options{'b'} if $options{'b'};
+ for ( @{ $html->extract_links } ) {
+ my($link, $elem) = @$_;
+ my $tag = uc $elem->tag;
+ $link = URI->new($link)->abs($base)->as_string;
+ print "$tag\t$link\n";
+ }
+ last;
+ };
+ $options{'o'} eq 'dump' && do {
+ $html->dump;
+ last;
+ };
+ # It is bad to not notice this before now :-(
+ die "Illegal -o option value ($options{'o'})\n";
+ }
+ }
+ else {
+ binmode STDOUT unless $options{'a'};
+ print $response->content;
+ }
+ }
+
+ $errors++ unless $response->is_success;
+}
+
+exit $errors;
+
+
+sub usage
+{
+ die <<"EOT";
+Usage: $progname [-options] <url>...
+ -m <method> use method for the request (default is '$method')
+ -f make request even if $progname believes method is illegal
+ -b <base> Use the specified URL as base
+ -t <timeout> Set timeout value
+ -i <time> Set the If-Modified-Since header on the request
+ -c <conttype> use this content-type for POST, PUT, CHECKIN
+ -a Use text mode for content I/O
+ -p <proxyurl> use this as a proxy
+ -P don't load proxy settings from environment
+ -H <header> send this HTTP header (you can specify several)
+ -C <username>:<password>
+ provide credentials for basic authentication
+
+ -u Display method and URL before any response
+ -U Display request headers (implies -u)
+ -s Display response status code
+ -S Display response status chain
+ -e Display response headers
+ -d Do not display content
+ -o <format> Process HTML content in various ways
+
+ -v Show program version
+ -h Print this message
+EOT
+}
--- /dev/null
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+lwp-rget - Retrieve web documents recursively
+
+=head1 SYNOPSIS
+
+ lwp-rget [--verbose] [--auth=USER:PASS] [--depth=N] [--hier] [--iis]
+ [--keepext=mime/type[,mime/type]] [--limit=N] [--nospace]
+ [--prefix=URL] [--referer=URL] [--sleep=N] [--tolower] <URL>
+ lwp-rget --version
+
+=head1 DESCRIPTION
+
+This program will retrieve a document and store it in a local file. It
+will follow any links found in the document and store these documents
+as well, patching links so that they refer to these local copies.
+This process continues until there are no more unvisited links or the
+process is stopped by the one or more of the limits which can be
+controlled by the command line arguments.
+
+This program is useful if you want to make a local copy of a
+collection of documents or want to do web reading off-line.
+
+All documents are stored as plain files in the current directory. The
+file names chosen are derived from the last component of URL paths.
+
+The options are:
+
+=over 3
+
+=item --auth=USER:PASS<n>
+
+Set the authentication credentials to user "USER" and password "PASS" if
+any restricted parts of the web site are hit. If there are restricted
+parts of the web site and authentication credentials are not available,
+those pages will not be downloaded.
+
+=item --depth=I<n>
+
+Limit the recursive level. Embedded images are always loaded, even if
+they fall outside the I<--depth>. This means that one can use
+I<--depth=0> in order to fetch a single document together with all
+inline graphics.
+
+The default depth is 5.
+
+=item --hier
+
+Download files into a hierarchy that mimics the web site structure.
+The default is to put all files in the current directory.
+
+=item --referer=I<URI>
+
+Set the value of the Referer header for the initial request. The
+special value C<"NONE"> can be used to suppress the Referer header in
+any of subsequent requests. The Referer header will always be suppressed
+in all normal C<http> requests if the referring page was transmitted over
+C<https> as recommended in RFC 2616.
+
+=item --iis
+
+Sends an "Accept: */*" on all URL requests as a workaround for a bug in
+IIS 2.0. If no Accept MIME header is present, IIS 2.0 returns with a
+"406 No acceptable objects were found" error. Also converts any back
+slashes (\\) in URLs to forward slashes (/).
+
+=item --keepext=I<mime/type[,mime/type]>
+
+Keeps the current extension for the list MIME types. Useful when
+downloading text/plain documents that shouldn't all be translated to
+*.txt files.
+
+=item --limit=I<n>
+
+Limit the number of documents to get. The default limit is 50.
+
+=item --nospace
+
+Changes spaces in all URLs to underscore characters (_). Useful when
+downloading files from sites serving URLs with spaces in them. Does not
+remove spaces from fragments, e.g., "file.html#somewhere in here".
+
+=item --prefix=I<url_prefix>
+
+Limit the links to follow. Only URLs that start the prefix string are
+followed.
+
+The default prefix is set as the "directory" of the initial URL to
+follow. For instance if we start lwp-rget with the URL
+C<http://www.sn.no/foo/bar.html>, then prefix will be set to
+C<http://www.sn.no/foo/>.
+
+Use C<--prefix=''> if you don't want the fetching to be limited by any
+prefix.
+
+=item --sleep=I<n>
+
+Sleep I<n> seconds before retrieving each document. This options allows
+you to go slowly, not loading the server you visiting too much.
+
+=item --tolower
+
+Translates all links to lowercase. Useful when downloading files from
+IIS since it does not serve files in a case sensitive manner.
+
+=item --verbose
+
+Make more noise while running.
+
+=item --quiet
+
+Don't make any noise.
+
+=item --version
+
+Print program version number and quit.
+
+=item --help
+
+Print the usage message and quit.
+
+=back
+
+Before the program exits the name of the file, where the initial URL
+is stored, is printed on stdout. All used filenames are also printed
+on stderr as they are loaded. This printing can be suppressed with
+the I<--quiet> option.
+
+=head1 SEE ALSO
+
+L<lwp-request>, L<LWP>
+
+=head1 AUTHOR
+
+Gisle Aas <aas@sn.no>
+
+=cut
+
+use strict;
+
+use Getopt::Long qw(GetOptions);
+use URI::URL qw(url);
+use LWP::MediaTypes qw(media_suffix);
+use HTML::Entities ();
+
+use vars qw($VERSION);
+use vars qw($MAX_DEPTH $MAX_DOCS $PREFIX $REFERER $VERBOSE $QUIET $SLEEP $HIER $AUTH $IIS $TOLOWER $NOSPACE %KEEPEXT);
+
+my $progname = $0;
+$progname =~ s|.*/||; # only basename left
+$progname =~ s/\.\w*$//; #strip extension if any
+
+$VERSION = "5.827";
+
+#$Getopt::Long::debug = 1;
+#$Getopt::Long::ignorecase = 0;
+
+# Defaults
+$MAX_DEPTH = 5;
+$MAX_DOCS = 50;
+
+GetOptions('version' => \&print_version,
+ 'help' => \&usage,
+ 'depth=i' => \$MAX_DEPTH,
+ 'limit=i' => \$MAX_DOCS,
+ 'verbose!' => \$VERBOSE,
+ 'quiet!' => \$QUIET,
+ 'sleep=i' => \$SLEEP,
+ 'prefix:s' => \$PREFIX,
+ 'referer:s'=> \$REFERER,
+ 'hier' => \$HIER,
+ 'auth=s' => \$AUTH,
+ 'iis' => \$IIS,
+ 'tolower' => \$TOLOWER,
+ 'nospace' => \$NOSPACE,
+ 'keepext=s' => \$KEEPEXT{'OPT'},
+ ) || usage();
+
+sub print_version {
+ require LWP;
+ my $DISTNAME = 'libwww-perl-' . LWP::Version();
+ print <<"EOT";
+This is lwp-rget version $VERSION ($DISTNAME)
+
+Copyright 1996-1998, Gisle Aas.
+
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+EOT
+ exit 0;
+}
+
+my $start_url = shift || usage();
+usage() if @ARGV;
+
+require LWP::UserAgent;
+my $ua = new LWP::UserAgent;
+$ua->agent("$progname/$VERSION ");
+$ua->env_proxy;
+
+unless (defined $PREFIX) {
+ $PREFIX = url($start_url); # limit to URLs below this one
+ eval {
+ $PREFIX->eparams(undef);
+ $PREFIX->equery(undef);
+ };
+
+ $_ = $PREFIX->epath;
+ s|[^/]+$||;
+ $PREFIX->epath($_);
+ $PREFIX = $PREFIX->as_string;
+}
+
+%KEEPEXT = map { lc($_) => 1 } split(/\s*,\s*/, ($KEEPEXT{'OPT'}||0));
+
+my $SUPPRESS_REFERER;
+$SUPPRESS_REFERER++ if ($REFERER || "") eq "NONE";
+
+print <<"" if $VERBOSE;
+START = $start_url
+MAX_DEPTH = $MAX_DEPTH
+MAX_DOCS = $MAX_DOCS
+PREFIX = $PREFIX
+
+my $no_docs = 0;
+my %seen = (); # mapping from URL => local_file
+
+my $filename = fetch($start_url, undef, $REFERER);
+print "$filename\n" unless $QUIET;
+
+sub fetch
+{
+ my($url, $type, $referer, $depth) = @_;
+
+ # Fix http://sitename.com/../blah/blah.html to
+ # http://sitename.com/blah/blah.html
+ $url = $url->as_string if (ref($url));
+ while ($url =~ s#(https?://[^/]+/)\.\.\/#$1#) {}
+
+ # Fix backslashes (\) in URL if $IIS defined
+ $url = fix_backslashes($url) if (defined $IIS);
+
+ $url = url($url);
+ $type ||= 'a';
+ # Might be the background attribute
+ $type = 'img' if ($type eq 'body' || $type eq 'td');
+ $depth ||= 0;
+
+ # Print the URL before we start checking...
+ my $out = (" " x $depth) . $url . " ";
+ $out .= "." x (60 - length($out));
+ print STDERR $out . " " if $VERBOSE;
+
+ # Can't get mailto things
+ if ($url->scheme eq 'mailto') {
+ print STDERR "*skipping mailto*\n" if $VERBOSE;
+ return $url->as_string;
+ }
+
+ # The $plain_url is a URL without the fragment part
+ my $plain_url = $url->clone;
+ $plain_url->frag(undef);
+
+ # Check PREFIX, but not for <IMG ...> links
+ if ($type ne 'img' and $url->as_string !~ /^\Q$PREFIX/o) {
+ print STDERR "*outsider*\n" if $VERBOSE;
+ return $url->as_string;
+ }
+
+ # Translate URL to lowercase if $TOLOWER defined
+ $plain_url = to_lower($plain_url) if (defined $TOLOWER);
+
+ # If we already have it, then there is nothing to be done
+ my $seen = $seen{$plain_url->as_string};
+ if ($seen) {
+ my $frag = $url->frag;
+ $seen .= "#$frag" if defined($frag);
+ $seen = protect_frag_spaces($seen);
+ print STDERR "$seen (again)\n" if $VERBOSE;
+ return $seen;
+ }
+
+ # Too much or too deep
+ if ($depth > $MAX_DEPTH and $type ne 'img') {
+ print STDERR "*too deep*\n" if $VERBOSE;
+ return $url;
+ }
+ if ($no_docs > $MAX_DOCS) {
+ print STDERR "*too many*\n" if $VERBOSE;
+ return $url;
+ }
+
+ # Fetch document
+ $no_docs++;
+ sleep($SLEEP) if $SLEEP;
+ my $req = HTTP::Request->new(GET => $url);
+ # See: http://ftp.sunet.se/pub/NT/mirror-microsoft/kb/Q163/7/74.TXT
+ $req->header ('Accept', '*/*') if (defined $IIS); # GIF/JPG from IIS 2.0
+ $req->authorization_basic(split (/:/, $AUTH)) if (defined $AUTH);
+ if ($referer && !$SUPPRESS_REFERER) {
+ if ($req->uri->scheme eq 'http') {
+ # RFC 2616, section 15.1.3
+ $referer = url($referer) unless ref($referer);
+ undef $referer if ($referer->scheme || '') eq 'https';
+ }
+ $req->referer($referer) if $referer;
+ }
+ my $res = $ua->request($req);
+
+ # Check outcome
+ if ($res->is_success) {
+ my $doc = $res->content;
+ my $ct = $res->content_type;
+ my $name = find_name($res->request->uri, $ct);
+ print STDERR "$name\n" unless $QUIET;
+ $seen{$plain_url->as_string} = $name;
+
+ # If the file is HTML, then we look for internal links
+ if ($ct eq "text/html") {
+ # Save an unprosessed version of the HTML document. This
+ # both reserves the name used, and it also ensures that we
+ # don't loose everything if this program is killed before
+ # we finish.
+ save($name, $doc);
+ my $base = $res->base;
+
+ # Follow and substitute links...
+ $doc =~
+s/
+ (
+ <(img|a|body|area|frame|td)\b # some interesting tag
+ [^>]+ # still inside tag (not strictly correct)
+ \b(?:src|href|background) # some link attribute
+ \s*=\s* # =
+ )
+ (?: # scope of OR-ing
+ (")([^"]*)" | # value in double quotes OR
+ (')([^']*)' | # value in single quotes OR
+ ([^\s>]+) # quoteless value
+ )
+/
+ new_link($1, lc($2), $3||$5, HTML::Entities::decode($4||$6||$7),
+ $base, $name, "$url", $depth+1)
+/giex;
+ # XXX
+ # The regular expression above is not strictly correct.
+ # It is not really possible to parse HTML with a single
+ # regular expression, but it is faster. Tags that might
+ # confuse us include:
+ # <a alt="href" href=link.html>
+ # <a alt=">" href="link.html">
+ #
+ }
+ save($name, $doc);
+ return $name;
+ }
+ else {
+ print STDERR $res->code . " " . $res->message . "\n" if $VERBOSE;
+ $seen{$plain_url->as_string} = $url->as_string;
+ return $url->as_string;
+ }
+}
+
+sub new_link
+{
+ my($pre, $type, $quote, $url, $base, $localbase, $referer, $depth) = @_;
+
+ $url = protect_frag_spaces($url);
+
+ $url = fetch(url($url, $base)->abs, $type, $referer, $depth);
+ $url = url("file:$url", "file:$localbase")->rel
+ unless $url =~ /^[.+\-\w]+:/;
+
+ $url = unprotect_frag_spaces($url);
+
+ return $pre . $quote . $url . $quote;
+}
+
+
+sub protect_frag_spaces
+{
+ my ($url) = @_;
+
+ $url = $url->as_string if (ref($url));
+
+ if ($url =~ m/^([^#]*#)(.+)$/)
+ {
+ my ($base, $frag) = ($1, $2);
+ $frag =~ s/ /%20/g;
+ $url = $base . $frag;
+ }
+
+ return $url;
+}
+
+
+sub unprotect_frag_spaces
+{
+ my ($url) = @_;
+
+ $url = $url->as_string if (ref($url));
+
+ if ($url =~ m/^([^#]*#)(.+)$/)
+ {
+ my ($base, $frag) = ($1, $2);
+ $frag =~ s/%20/ /g;
+ $url = $base . $frag;
+ }
+
+ return $url;
+}
+
+
+sub fix_backslashes
+{
+ my ($url) = @_;
+ my ($base, $frag);
+
+ $url = $url->as_string if (ref($url));
+
+ if ($url =~ m/([^#]+)(#.*)/)
+ {
+ ($base, $frag) = ($1, $2);
+ }
+ else
+ {
+ $base = $url;
+ $frag = "";
+ }
+
+ $base =~ tr/\\/\//;
+ $base =~ s/%5[cC]/\//g; # URL-encoded back slash is %5C
+
+ return $base . $frag;
+}
+
+
+sub to_lower
+{
+ my ($url) = @_;
+ my $was_object = 0;
+
+ if (ref($url))
+ {
+ $url = $url->as_string;
+ $was_object = 1;
+ }
+
+ if ($url =~ m/([^#]+)(#.*)/)
+ {
+ $url = lc($1) . $2;
+ }
+ else
+ {
+ $url = lc($url);
+ }
+
+ if ($was_object == 1)
+ {
+ return url($url);
+ }
+ else
+ {
+ return $url;
+ }
+}
+
+
+sub translate_spaces
+{
+ my ($url) = @_;
+ my ($base, $frag);
+
+ $url = $url->as_string if (ref($url));
+
+ if ($url =~ m/([^#]+)(#.*)/)
+ {
+ ($base, $frag) = ($1, $2);
+ }
+ else
+ {
+ $base = $url;
+ $frag = "";
+ }
+
+ $base =~ s/^ *//; # Remove initial spaces from base
+ $base =~ s/ *$//; # Remove trailing spaces from base
+
+ $base =~ tr/ /_/;
+ $base =~ s/%20/_/g; # URL-encoded space is %20
+
+ return $base . $frag;
+}
+
+
+sub mkdirp
+{
+ my($directory, $mode) = @_;
+ my @dirs = split(/\//, $directory);
+ my $path = shift(@dirs); # build it as we go
+ my $result = 1; # assume it will work
+
+ unless (-d $path) {
+ $result &&= mkdir($path, $mode);
+ }
+
+ foreach (@dirs) {
+ $path .= "/$_";
+ if ( ! -d $path) {
+ $result &&= mkdir($path, $mode);
+ }
+ }
+
+ return $result;
+}
+
+
+sub find_name
+{
+ my($url, $type) = @_;
+ #print "find_name($url, $type)\n";
+
+ # Translate spaces in URL to underscores (_) if $NOSPACE defined
+ $url = translate_spaces($url) if (defined $NOSPACE);
+
+ # Translate URL to lowercase if $TOLOWER defined
+ $url = to_lower($url) if (defined $TOLOWER);
+
+ $url = url($url) unless ref($url);
+
+ my $path = $url->path;
+
+ # trim path until only the basename is left
+ $path =~ s|(.*/)||;
+ my $dirname = ".$1";
+ if (!$HIER) {
+ $dirname = "";
+ }
+ elsif (! -d $dirname) {
+ mkdirp($dirname, 0775);
+ }
+
+ my $extra = ""; # something to make the name unique
+ my $suffix;
+
+ if ($KEEPEXT{lc($type)}) {
+ $suffix = ($path =~ m/\.(.*)/) ? $1 : "";
+ }
+ else {
+ $suffix = media_suffix($type);
+ }
+
+ $path =~ s|\..*||; # trim suffix
+ $path = "index" unless length $path;
+
+ while (1) {
+ # Construct a new file name
+ my $file = $dirname . $path . $extra;
+ $file .= ".$suffix" if $suffix;
+ # Check if it is unique
+ return $file unless -f $file;
+
+ # Try something extra
+ unless ($extra) {
+ $extra = "001";
+ next;
+ }
+ $extra++;
+ }
+}
+
+
+sub save
+{
+ my $name = shift;
+ #print "save($name,...)\n";
+ open(FILE, ">$name") || die "Can't save $name: $!";
+ binmode FILE;
+ print FILE $_[0];
+ close(FILE);
+}
+
+
+sub usage
+{
+ print <<""; exit 1;
+Usage: $progname [options] <URL>
+Allowed options are:
+ --auth=USER:PASS Set authentication credentials for web site
+ --depth=N Maximum depth to traverse (default is $MAX_DEPTH)
+ --hier Download into hierarchy (not all files into cwd)
+ --referer=URI Set initial referer header (or "NONE")
+ --iis Workaround IIS 2.0 bug by sending "Accept: */*" MIME
+ header; translates backslashes (\\) to forward slashes (/)
+ --keepext=type Keep file extension for MIME types (comma-separated list)
+ --limit=N A limit on the number documents to get (default is $MAX_DOCS)
+ --nospace Translate spaces URLs (not #fragments) to underscores (_)
+ --version Print version number and quit
+ --verbose More output
+ --quiet No output
+ --sleep=SECS Sleep between gets, ie. go slowly
+ --prefix=PREFIX Limit URLs to follow to those which begin with PREFIX
+ --tolower Translate all URLs to lowercase (useful with IIS servers)
+
+}
--- /dev/null
+package Bundle::LWP;
+
+$VERSION = "5.835";
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bundle::LWP - install all libwww-perl related modules
+
+=head1 SYNOPSIS
+
+ perl -MCPAN -e 'install Bundle::LWP'
+
+=head1 CONTENTS
+
+MIME::Base64 - Used in authentication headers
+
+Digest::MD5 - Needed to do Digest authentication
+
+URI 1.10 - There are URIs everywhere
+
+Net::FTP 2.58 - If you want ftp://-support
+
+HTML::Tagset - Needed by HTML::Parser
+
+HTML::Parser - Needed by HTML::HeadParser
+
+HTML::HeadParser - To get the correct $res->base
+
+LWP - The reason why you need the modules above
+
+=head1 DESCRIPTION
+
+This bundle defines all prerequisite modules for libwww-perl. Bundles
+have special meaning for the CPAN module. When you install the bundle
+module all modules mentioned in L</CONTENTS> will be installed
+instead.
+
+=head1 SEE ALSO
+
+L<CPAN/Bundles>
--- /dev/null
+package File::Listing;
+
+sub Version { $VERSION; }
+$VERSION = "5.814";
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(parse_dir);
+
+use strict;
+
+use Carp ();
+use HTTP::Date qw(str2time);
+
+
+
+sub parse_dir ($;$$$)
+{
+ my($dir, $tz, $fstype, $error) = @_;
+
+ $fstype ||= 'unix';
+ $fstype = "File::Listing::" . lc $fstype;
+
+ my @args = $_[0];
+ push(@args, $tz) if(@_ >= 2);
+ push(@args, $error) if(@_ >= 4);
+
+ $fstype->parse(@args);
+}
+
+
+sub line { Carp::croak("Not implemented yet"); }
+sub init { } # Dummy sub
+
+
+sub file_mode ($)
+{
+ # This routine was originally borrowed from Graham Barr's
+ # Net::FTP package.
+
+ local $_ = shift;
+ my $mode = 0;
+ my($type,$ch);
+
+ s/^(.)// and $type = $1;
+
+ while (/(.)/g) {
+ $mode <<= 1;
+ $mode |= 1 if $1 ne "-" &&
+ $1 ne 'S' &&
+ $1 ne 't' &&
+ $1 ne 'T';
+ }
+
+ $type eq "d" and $mode |= 0040000 or # Directory
+ $type eq "l" and $mode |= 0120000 or # Symbolic Link
+ $mode |= 0100000; # Regular File
+
+ $mode |= 0004000 if /^...s....../i;
+ $mode |= 0002000 if /^......s.../i;
+ $mode |= 0001000 if /^.........t/i;
+
+ $mode;
+}
+
+
+sub parse
+{
+ my($pkg, $dir, $tz, $error) = @_;
+
+ # First let's try to determine what kind of dir parameter we have
+ # received. We allow both listings, reference to arrays and
+ # file handles to read from.
+
+ if (ref($dir) eq 'ARRAY') {
+ # Already splitted up
+ }
+ elsif (ref($dir) eq 'GLOB') {
+ # A file handle
+ }
+ elsif (ref($dir)) {
+ Carp::croak("Illegal argument to parse_dir()");
+ }
+ elsif ($dir =~ /^\*\w+(::\w+)+$/) {
+ # This scalar looks like a file handle, so we assume it is
+ }
+ else {
+ # A normal scalar listing
+ $dir = [ split(/\n/, $dir) ];
+ }
+
+ $pkg->init();
+
+ my @files = ();
+ if (ref($dir) eq 'ARRAY') {
+ for (@$dir) {
+ push(@files, $pkg->line($_, $tz, $error));
+ }
+ }
+ else {
+ local($_);
+ while (<$dir>) {
+ chomp;
+ push(@files, $pkg->line($_, $tz, $error));
+ }
+ }
+ wantarray ? @files : \@files;
+}
+
+
+
+package File::Listing::unix;
+
+use HTTP::Date qw(str2time);
+
+# A place to remember current directory from last line parsed.
+use vars qw($curdir @ISA);
+
+@ISA = qw(File::Listing);
+
+
+
+sub init
+{
+ $curdir = '';
+}
+
+
+sub line
+{
+ shift; # package name
+ local($_) = shift;
+ my($tz, $error) = @_;
+
+ s/\015//g;
+ #study;
+
+ my ($kind, $size, $date, $name);
+ if (($kind, $size, $date, $name) =
+ /^([\-FlrwxsStTdD]{10}) # Type and permission bits
+ .* # Graps
+ \D(\d+) # File size
+ \s+ # Some space
+ (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date
+ \s+ # Some more space
+ (.*)$ # File name
+ /x )
+
+ {
+ return if $name eq '.' || $name eq '..';
+ $name = "$curdir/$name" if length $curdir;
+ my $type = '?';
+ if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) {
+ $name = $1;
+ $type = "l $2";
+ }
+ elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file
+ $type = 'f';
+ }
+ elsif ($kind =~ /^[dD]/) {
+ $type = 'd';
+ $size = undef; # Don't believe the reported size
+ }
+ return [$name, $type, $size, str2time($date, $tz),
+ File::Listing::file_mode($kind)];
+
+ }
+ elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) {
+ my $dir = $1;
+ return () if $dir eq '.';
+ $curdir = $dir;
+ return ();
+ }
+ elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) {
+ return ();
+ }
+ elsif (/not found/ || # OSF1, HPUX, and SunOS return
+ # "$file not found"
+ /No such file/ || # IRIX returns
+ # "UX:ls: ERROR: Cannot access $file: No such file or directory"
+ # Solaris returns
+ # "$file: No such file or directory"
+ /cannot find/ # Windows NT returns
+ # "The system cannot find the path specified."
+ ) {
+ return () unless defined $error;
+ &$error($_) if ref($error) eq 'CODE';
+ warn "Error: $_\n" if $error eq 'warn';
+ return ();
+ }
+ elsif ($_ eq '') { # AIX, and Linux return nothing
+ return () unless defined $error;
+ &$error("No such file or directory") if ref($error) eq 'CODE';
+ warn "Warning: No such file or directory\n" if $error eq 'warn';
+ return ();
+ }
+ else {
+ # parse failed, check if the dosftp parse understands it
+ File::Listing::dosftp->init();
+ return(File::Listing::dosftp->line($_,$tz,$error));
+ }
+
+}
+
+
+
+package File::Listing::dosftp;
+
+use HTTP::Date qw(str2time);
+
+# A place to remember current directory from last line parsed.
+use vars qw($curdir @ISA);
+
+@ISA = qw(File::Listing);
+
+
+
+sub init
+{
+ $curdir = '';
+}
+
+
+sub line
+{
+ shift; # package name
+ local($_) = shift;
+ my($tz, $error) = @_;
+
+ s/\015//g;
+
+ my ($date, $size_or_dir, $name, $size);
+
+ # 02-05-96 10:48AM 1415 src.slf
+ # 09-10-96 09:18AM <DIR> sl_util
+ if (($date, $size_or_dir, $name) =
+ /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info
+ \s+ # Some space
+ (<\w{3}>|\d+) # Dir or Size
+ \s+ # Some more space
+ (.+)$ # File name
+ /x )
+ {
+ return if $name eq '.' || $name eq '..';
+ $name = "$curdir/$name" if length $curdir;
+ my $type = '?';
+ if ($size_or_dir eq '<DIR>') {
+ $type = "d";
+ $size = ""; # directories have no size in the pc listing
+ }
+ else {
+ $type = 'f';
+ $size = $size_or_dir;
+ }
+ return [$name, $type, $size, str2time($date, $tz), undef];
+ }
+ else {
+ return () unless defined $error;
+ &$error($_) if ref($error) eq 'CODE';
+ warn "Can't parse: $_\n" if $error eq 'warn';
+ return ();
+ }
+
+}
+
+
+
+package File::Listing::vms;
+@File::Listing::vms::ISA = qw(File::Listing);
+
+package File::Listing::netware;
+@File::Listing::netware::ISA = qw(File::Listing);
+
+
+
+package File::Listing::apache;
+
+use vars qw(@ISA);
+
+@ISA = qw(File::Listing);
+
+
+sub init { }
+
+
+sub line {
+ shift; # package name
+ local($_) = shift;
+ my($tz, $error) = @_; # ignored for now...
+
+ if (m!<A\s+HREF=\"([^\"]+)\">.*</A>.*?(\d+)-([a-zA-Z]+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kM]?|-))!i) {
+ my($filename, $filesize) = ($1, $7);
+ my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6);
+
+ $filesize = 0 if $filesize eq '-';
+ if ($filesize =~ s/k$//i) {
+ $filesize *= 1024;
+ }
+ elsif ($filesize =~ s/M$//) {
+ $filesize *= 1024*1024;
+ }
+ elsif ($filesize =~ s/G$//) {
+ $filesize *= 1024*1024*1024;
+ }
+ $filesize = int $filesize;
+
+ require Time::Local;
+ my $filetime = Time::Local::timelocal(0,$M,$H,$d,_monthabbrev_number($m)-1,_guess_year($y)-1900);
+ my $filetype = ($filename =~ s|/$|| ? "d" : "f");
+ return [$filename, $filetype, $filesize, $filetime, undef];
+ }
+
+ return ();
+}
+
+
+sub _guess_year {
+ my $y = shift;
+ if ($y >= 90) {
+ $y = 1900+$y;
+ }
+ elsif ($y < 100) {
+ $y = 2000+$y;
+ }
+ $y;
+}
+
+
+sub _monthabbrev_number {
+ my $mon = shift;
+ +{'Jan' => 1,
+ 'Feb' => 2,
+ 'Mar' => 3,
+ 'Apr' => 4,
+ 'May' => 5,
+ 'Jun' => 6,
+ 'Jul' => 7,
+ 'Aug' => 8,
+ 'Sep' => 9,
+ 'Oct' => 10,
+ 'Nov' => 11,
+ 'Dec' => 12,
+ }->{$mon};
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+File::Listing - parse directory listing
+
+=head1 SYNOPSIS
+
+ use File::Listing qw(parse_dir);
+ $ENV{LANG} = "C"; # dates in non-English locales not supported
+ for (parse_dir(`ls -l`)) {
+ ($name, $type, $size, $mtime, $mode) = @$_;
+ next if $type ne 'f'; # plain file
+ #...
+ }
+
+ # directory listing can also be read from a file
+ open(LISTING, "zcat ls-lR.gz|");
+ $dir = parse_dir(\*LISTING, '+0000');
+
+=head1 DESCRIPTION
+
+This module exports a single function called parse_dir(), which can be
+used to parse directory listings.
+
+The first parameter to parse_dir() is the directory listing to parse.
+It can be a scalar, a reference to an array of directory lines or a
+glob representing a filehandle to read the directory listing from.
+
+The second parameter is the time zone to use when parsing time stamps
+in the listing. If this value is undefined, then the local time zone is
+assumed.
+
+The third parameter is the type of listing to assume. Currently
+supported formats are 'unix', 'apache' and 'dosftp'. The default
+value 'unix'. Ideally, the listing type should be determined
+automatically.
+
+The fourth parameter specifies how unparseable lines should be treated.
+Values can be 'ignore', 'warn' or a code reference. Warn means that
+the perl warn() function will be called. If a code reference is
+passed, then this routine will be called and the return value from it
+will be incorporated in the listing. The default is 'ignore'.
+
+Only the first parameter is mandatory.
+
+The return value from parse_dir() is a list of directory entries. In
+a scalar context the return value is a reference to the list. The
+directory entries are represented by an array consisting of [
+$filename, $filetype, $filesize, $filetime, $filemode ]. The
+$filetype value is one of the letters 'f', 'd', 'l' or '?'. The
+$filetime value is the seconds since Jan 1, 1970. The
+$filemode is a bitmask like the mode returned by stat().
+
+=head1 CREDITS
+
+Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and
+Net::FTP's parse_dir (Graham Barr).
--- /dev/null
+package HTML::Form;
+
+use strict;
+use URI;
+use Carp ();
+
+use vars qw($VERSION $Encode_available);
+$VERSION = "5.829";
+
+eval { require Encode };
+$Encode_available = !$@;
+
+my %form_tags = map {$_ => 1} qw(input textarea button select option);
+
+my %type2class = (
+ text => "TextInput",
+ password => "TextInput",
+ hidden => "TextInput",
+ textarea => "TextInput",
+
+ "reset" => "IgnoreInput",
+
+ radio => "ListInput",
+ checkbox => "ListInput",
+ option => "ListInput",
+
+ button => "SubmitInput",
+ submit => "SubmitInput",
+ image => "ImageInput",
+ file => "FileInput",
+
+ keygen => "KeygenInput",
+);
+
+=head1 NAME
+
+HTML::Form - Class that represents an HTML form element
+
+=head1 SYNOPSIS
+
+ use HTML::Form;
+ $form = HTML::Form->parse($html, $base_uri);
+ $form->value(query => "Perl");
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $response = $ua->request($form->click);
+
+=head1 DESCRIPTION
+
+Objects of the C<HTML::Form> class represents a single HTML
+C<E<lt>formE<gt> ... E<lt>/formE<gt>> instance. A form consists of a
+sequence of inputs that usually have names, and which can take on
+various values. The state of a form can be tweaked and it can then be
+asked to provide C<HTTP::Request> objects that can be passed to the
+request() method of C<LWP::UserAgent>.
+
+The following methods are available:
+
+=over 4
+
+=item @forms = HTML::Form->parse( $html_document, $base_uri )
+
+=item @forms = HTML::Form->parse( $html_document, base => $base_uri, %opt )
+
+=item @forms = HTML::Form->parse( $response, %opt )
+
+The parse() class method will parse an HTML document and build up
+C<HTML::Form> objects for each <form> element found. If called in scalar
+context only returns the first <form>. Returns an empty list if there
+are no forms to be found.
+
+The required arguments is the HTML document to parse ($html_document) and the
+URI used to retrieve the document ($base_uri). The base URI is needed to resolve
+relative action URIs. The provided HTML document should be a Unicode string
+(or US-ASCII).
+
+By default HTML::Form assumes that the original document was UTF-8 encoded and
+thus encode forms that don't specify an explict I<accept-charset> as UTF-8.
+The charset assumed can be overridden by providing the C<charset> option to
+parse(). It's a good idea to be explict about this parameter as well, thus
+the recommended simplest invocation becomes:
+
+ my @forms = HTML::Form->parse(
+ Encode::decode($encoding, $html_document_bytes),
+ base => $base_uri,
+ charset => $encoding,
+ );
+
+If the document was retrieved with LWP then the response object provide methods
+to obtain a proper value for C<base> and C<charset>:
+
+ my $ua = LWP::UserAgent->new;
+ my $response = $ua->get("http://www.example.com/form.html");
+ my @forms = HTML::Form->parse($response->decoded_content,
+ base => $response->base,
+ charset => $response->content_charset,
+ );
+
+In fact, the parse() method can parse from an C<HTTP::Response> object
+directly, so the example above can be more conveniently written as:
+
+ my $ua = LWP::UserAgent->new;
+ my $response = $ua->get("http://www.example.com/form.html");
+ my @forms = HTML::Form->parse($response);
+
+Note that any object that implements a decoded_content(), base() and
+content_charset() method with similar behaviour as C<HTTP::Response> will do.
+
+Additional options might be passed in to control how the parse method
+behaves. The following are all the options currently recognized:
+
+=over
+
+=item C<< base => $uri >>
+
+This is the URI used to retrive the original document. This option is not optional ;-)
+
+=item C<< charset => $str >>
+
+Specify what charset the original document was encoded in. This is used as
+the default for accept_charset. If not provided this defaults to "UTF-8".
+
+=item C<< verbose => $bool >>
+
+Warn (print messages to STDERR) about any bad HTML form constructs found.
+You can trap these with $SIG{__WARN__}.
+
+=item C<< strict => $bool >>
+
+Initialize any form objects with the given strict attribute.
+
+=back
+
+=cut
+
+sub parse
+{
+ my $class = shift;
+ my $html = shift;
+ unshift(@_, "base") if @_ == 1;
+ my %opt = @_;
+
+ require HTML::TokeParser;
+ my $p = HTML::TokeParser->new(ref($html) ? $html->decoded_content(ref => 1) : \$html);
+ die "Failed to create HTML::TokeParser object" unless $p;
+
+ my $base_uri = delete $opt{base};
+ my $charset = delete $opt{charset};
+ my $strict = delete $opt{strict};
+ my $verbose = delete $opt{verbose};
+
+ if ($^W) {
+ Carp::carp("Unrecognized option $_ in HTML::Form->parse") for sort keys %opt;
+ }
+
+ unless (defined $base_uri) {
+ if (ref($html)) {
+ $base_uri = $html->base;
+ }
+ else {
+ Carp::croak("HTML::Form::parse: No \$base_uri provided");
+ }
+ }
+ unless (defined $charset) {
+ if (ref($html) and $html->can("content_charset")) {
+ $charset = $html->content_charset;
+ }
+ unless ($charset) {
+ $charset = "UTF-8";
+ }
+ }
+
+ my @forms;
+ my $f; # current form
+
+ my %openselect; # index to the open instance of a select
+
+ while (my $t = $p->get_tag) {
+ my($tag,$attr) = @$t;
+ if ($tag eq "form") {
+ my $action = delete $attr->{'action'};
+ $action = "" unless defined $action;
+ $action = URI->new_abs($action, $base_uri);
+ $f = $class->new($attr->{'method'},
+ $action,
+ $attr->{'enctype'});
+ $f->accept_charset($attr->{'accept-charset'}) if $attr->{'accept-charset'};
+ $f->{default_charset} = $charset;
+ $f->{attr} = $attr;
+ $f->strict(1) if $strict;
+ %openselect = ();
+ push(@forms, $f);
+ my(%labels, $current_label);
+ while (my $t = $p->get_tag) {
+ my($tag, $attr) = @$t;
+ last if $tag eq "/form";
+
+ # if we are inside a label tag, then keep
+ # appending any text to the current label
+ if(defined $current_label) {
+ $current_label = join " ",
+ grep { defined and length }
+ $current_label,
+ $p->get_phrase;
+ }
+
+ if ($tag eq "input") {
+ $attr->{value_name} =
+ exists $attr->{id} && exists $labels{$attr->{id}} ? $labels{$attr->{id}} :
+ defined $current_label ? $current_label :
+ $p->get_phrase;
+ }
+
+ if ($tag eq "label") {
+ $current_label = $p->get_phrase;
+ $labels{ $attr->{for} } = $current_label
+ if exists $attr->{for};
+ }
+ elsif ($tag eq "/label") {
+ $current_label = undef;
+ }
+ elsif ($tag eq "input") {
+ my $type = delete $attr->{type} || "text";
+ $f->push_input($type, $attr, $verbose);
+ }
+ elsif ($tag eq "button") {
+ my $type = delete $attr->{type} || "submit";
+ $f->push_input($type, $attr, $verbose);
+ }
+ elsif ($tag eq "textarea") {
+ $attr->{textarea_value} = $attr->{value}
+ if exists $attr->{value};
+ my $text = $p->get_text("/textarea");
+ $attr->{value} = $text;
+ $f->push_input("textarea", $attr, $verbose);
+ }
+ elsif ($tag eq "select") {
+ # rename attributes reserved to come for the option tag
+ for ("value", "value_name") {
+ $attr->{"select_$_"} = delete $attr->{$_}
+ if exists $attr->{$_};
+ }
+ # count this new select option separately
+ my $name = $attr->{name};
+ $name = "" unless defined $name;
+ $openselect{$name}++;
+
+ while ($t = $p->get_tag) {
+ my $tag = shift @$t;
+ last if $tag eq "/select";
+ next if $tag =~ m,/?optgroup,;
+ next if $tag eq "/option";
+ if ($tag eq "option") {
+ my %a = %{$t->[0]};
+ # rename keys so they don't clash with %attr
+ for (keys %a) {
+ next if $_ eq "value";
+ $a{"option_$_"} = delete $a{$_};
+ }
+ while (my($k,$v) = each %$attr) {
+ $a{$k} = $v;
+ }
+ $a{value_name} = $p->get_trimmed_text;
+ $a{value} = delete $a{value_name}
+ unless defined $a{value};
+ $a{idx} = $openselect{$name};
+ $f->push_input("option", \%a, $verbose);
+ }
+ else {
+ warn("Bad <select> tag '$tag' in $base_uri\n") if $verbose;
+ if ($tag eq "/form" ||
+ $tag eq "input" ||
+ $tag eq "textarea" ||
+ $tag eq "select" ||
+ $tag eq "keygen")
+ {
+ # MSIE implictly terminate the <select> here, so we
+ # try to do the same. Actually the MSIE behaviour
+ # appears really strange: <input> and <textarea>
+ # do implictly close, but not <select>, <keygen> or
+ # </form>.
+ my $type = ($tag =~ s,^/,,) ? "E" : "S";
+ $p->unget_token([$type, $tag, @$t]);
+ last;
+ }
+ }
+ }
+ }
+ elsif ($tag eq "keygen") {
+ $f->push_input("keygen", $attr, $verbose);
+ }
+ }
+ }
+ elsif ($form_tags{$tag}) {
+ warn("<$tag> outside <form> in $base_uri\n") if $verbose;
+ }
+ }
+ for (@forms) {
+ $_->fixup;
+ }
+
+ wantarray ? @forms : $forms[0];
+}
+
+sub new {
+ my $class = shift;
+ my $self = bless {}, $class;
+ $self->{method} = uc(shift || "GET");
+ $self->{action} = shift || Carp::croak("No action defined");
+ $self->{enctype} = lc(shift || "application/x-www-form-urlencoded");
+ $self->{accept_charset} = "UNKNOWN";
+ $self->{default_charset} = "UTF-8";
+ $self->{inputs} = [@_];
+ $self;
+}
+
+
+sub push_input
+{
+ my($self, $type, $attr, $verbose) = @_;
+ $type = lc $type;
+ my $class = $type2class{$type};
+ unless ($class) {
+ Carp::carp("Unknown input type '$type'") if $verbose;
+ $class = "TextInput";
+ }
+ $class = "HTML::Form::$class";
+ my @extra;
+ push(@extra, readonly => 1) if $type eq "hidden";
+ push(@extra, strict => 1) if $self->{strict};
+ if ($type eq "file" && exists $attr->{value}) {
+ # it's not safe to trust the value set by the server
+ # the user always need to explictly set the names of files to upload
+ $attr->{orig_value} = delete $attr->{value};
+ }
+ delete $attr->{type}; # don't confuse the type argument
+ my $input = $class->new(type => $type, %$attr, @extra);
+ $input->add_to_form($self);
+}
+
+
+=item $method = $form->method
+
+=item $form->method( $new_method )
+
+This method is gets/sets the I<method> name used for the
+C<HTTP::Request> generated. It is a string like "GET" or "POST".
+
+=item $action = $form->action
+
+=item $form->action( $new_action )
+
+This method gets/sets the URI which we want to apply the request
+I<method> to.
+
+=item $enctype = $form->enctype
+
+=item $form->enctype( $new_enctype )
+
+This method gets/sets the encoding type for the form data. It is a
+string like "application/x-www-form-urlencoded" or "multipart/form-data".
+
+=item $accept = $form->accept_charset
+
+=item $form->accept_charset( $new_accept )
+
+This method gets/sets the list of charset encodings that the server processing
+the form accepts. Current implementation supports only one-element lists.
+Default value is "UNKNOWN" which we interpret as a request to use document
+charset as specified by the 'charset' parameter of the parse() method. To
+encode character strings you should have modern perl with Encode module. On
+older perls the setting of this attribute has no effect.
+
+=cut
+
+BEGIN {
+ # Set up some accesor
+ for (qw(method action enctype accept_charset)) {
+ my $m = $_;
+ no strict 'refs';
+ *{$m} = sub {
+ my $self = shift;
+ my $old = $self->{$m};
+ $self->{$m} = shift if @_;
+ $old;
+ };
+ }
+ *uri = \&action; # alias
+}
+
+=item $value = $form->attr( $name )
+
+=item $form->attr( $name, $new_value )
+
+This method give access to the original HTML attributes of the <form> tag.
+The $name should always be passed in lower case.
+
+Example:
+
+ @f = HTML::Form->parse( $html, $foo );
+ @f = grep $_->attr("id") eq "foo", @f;
+ die "No form named 'foo' found" unless @f;
+ $foo = shift @f;
+
+=cut
+
+sub attr {
+ my $self = shift;
+ my $name = shift;
+ return undef unless defined $name;
+
+ my $old = $self->{attr}{$name};
+ $self->{attr}{$name} = shift if @_;
+ return $old;
+}
+
+=item $bool = $form->strict
+
+=item $form->strict( $bool )
+
+Gets/sets the strict attribute of a form. If the strict is turned on
+the methods that change values of the form will croak if you try to
+set illegal values or modify readonly fields. The default is not to be strict.
+
+=cut
+
+sub strict {
+ my $self = shift;
+ my $old = $self->{strict};
+ if (@_) {
+ $self->{strict} = shift;
+ for my $input (@{$self->{inputs}}) {
+ $input->strict($self->{strict});
+ }
+ }
+ return $old;
+}
+
+
+=item @inputs = $form->inputs
+
+This method returns the list of inputs in the form. If called in
+scalar context it returns the number of inputs contained in the form.
+See L</INPUTS> for what methods are available for the input objects
+returned.
+
+=cut
+
+sub inputs
+{
+ my $self = shift;
+ @{$self->{'inputs'}};
+}
+
+
+=item $input = $form->find_input( $selector )
+
+=item $input = $form->find_input( $selector, $type )
+
+=item $input = $form->find_input( $selector, $type, $index )
+
+This method is used to locate specific inputs within the form. All
+inputs that match the arguments given are returned. In scalar context
+only the first is returned, or C<undef> if none match.
+
+If $selector is specified, then the input's name, id, class attribute must
+match. A selector prefixed with '#' must match the id attribute of the input.
+A selector prefixed with '.' matches the class attribute. A selector prefixed
+with '^' or with no prefix matches the name attribute.
+
+If $type is specified, then the input must have the specified type.
+The following type names are used: "text", "password", "hidden",
+"textarea", "file", "image", "submit", "radio", "checkbox" and "option".
+
+The $index is the sequence number of the input matched where 1 is the
+first. If combined with $name and/or $type then it select the I<n>th
+input with the given name and/or type.
+
+=cut
+
+sub find_input
+{
+ my($self, $name, $type, $no) = @_;
+ if (wantarray) {
+ my @res;
+ my $c;
+ for (@{$self->{'inputs'}}) {
+ next if defined($name) && !$_->selected($name);
+ next if $type && $type ne $_->{type};
+ $c++;
+ next if $no && $no != $c;
+ push(@res, $_);
+ }
+ return @res;
+
+ }
+ else {
+ $no ||= 1;
+ for (@{$self->{'inputs'}}) {
+ next if defined($name) && !$_->selected($name);
+ next if $type && $type ne $_->{type};
+ next if --$no;
+ return $_;
+ }
+ return undef;
+ }
+}
+
+sub fixup
+{
+ my $self = shift;
+ for (@{$self->{'inputs'}}) {
+ $_->fixup;
+ }
+}
+
+
+=item $value = $form->value( $selector )
+
+=item $form->value( $selector, $new_value )
+
+The value() method can be used to get/set the value of some input. If
+strict is enabled and no input has the indicated name, then this method will croak.
+
+If multiple inputs have the same name, only the first one will be
+affected.
+
+The call:
+
+ $form->value('foo')
+
+is basically a short-hand for:
+
+ $form->find_input('foo')->value;
+
+=cut
+
+sub value
+{
+ my $self = shift;
+ my $key = shift;
+ my $input = $self->find_input($key);
+ unless ($input) {
+ Carp::croak("No such field '$key'") if $self->{strict};
+ return undef unless @_;
+ $input = $self->push_input("text", { name => $key, value => "" });
+ }
+ local $Carp::CarpLevel = 1;
+ $input->value(@_);
+}
+
+=item @names = $form->param
+
+=item @values = $form->param( $name )
+
+=item $form->param( $name, $value, ... )
+
+=item $form->param( $name, \@values )
+
+Alternative interface to examining and setting the values of the form.
+
+If called without arguments then it returns the names of all the
+inputs in the form. The names will not repeat even if multiple inputs
+have the same name. In scalar context the number of different names
+is returned.
+
+If called with a single argument then it returns the value or values
+of inputs with the given name. If called in scalar context only the
+first value is returned. If no input exists with the given name, then
+C<undef> is returned.
+
+If called with 2 or more arguments then it will set values of the
+named inputs. This form will croak if no inputs have the given name
+or if any of the values provided does not fit. Values can also be
+provided as a reference to an array. This form will allow unsetting
+all values with the given name as well.
+
+This interface resembles that of the param() function of the CGI
+module.
+
+=cut
+
+sub param {
+ my $self = shift;
+ if (@_) {
+ my $name = shift;
+ my @inputs;
+ for ($self->inputs) {
+ my $n = $_->name;
+ next if !defined($n) || $n ne $name;
+ push(@inputs, $_);
+ }
+
+ if (@_) {
+ # set
+ die "No '$name' parameter exists" unless @inputs;
+ my @v = @_;
+ @v = @{$v[0]} if @v == 1 && ref($v[0]);
+ while (@v) {
+ my $v = shift @v;
+ my $err;
+ for my $i (0 .. @inputs-1) {
+ eval {
+ $inputs[$i]->value($v);
+ };
+ unless ($@) {
+ undef($err);
+ splice(@inputs, $i, 1);
+ last;
+ }
+ $err ||= $@;
+ }
+ die $err if $err;
+ }
+
+ # the rest of the input should be cleared
+ for (@inputs) {
+ $_->value(undef);
+ }
+ }
+ else {
+ # get
+ my @v;
+ for (@inputs) {
+ if (defined(my $v = $_->value)) {
+ push(@v, $v);
+ }
+ }
+ return wantarray ? @v : $v[0];
+ }
+ }
+ else {
+ # list parameter names
+ my @n;
+ my %seen;
+ for ($self->inputs) {
+ my $n = $_->name;
+ next if !defined($n) || $seen{$n}++;
+ push(@n, $n);
+ }
+ return @n;
+ }
+}
+
+
+=item $form->try_others( \&callback )
+
+This method will iterate over all permutations of unvisited enumerated
+values (<select>, <radio>, <checkbox>) and invoke the callback for
+each. The callback is passed the $form as argument. The return value
+from the callback is ignored and the try_others() method itself does
+not return anything.
+
+=cut
+
+sub try_others
+{
+ my($self, $cb) = @_;
+ my @try;
+ for (@{$self->{'inputs'}}) {
+ my @not_tried_yet = $_->other_possible_values;
+ next unless @not_tried_yet;
+ push(@try, [\@not_tried_yet, $_]);
+ }
+ return unless @try;
+ $self->_try($cb, \@try, 0);
+}
+
+sub _try
+{
+ my($self, $cb, $try, $i) = @_;
+ for (@{$try->[$i][0]}) {
+ $try->[$i][1]->value($_);
+ &$cb($self);
+ $self->_try($cb, $try, $i+1) if $i+1 < @$try;
+ }
+}
+
+
+=item $request = $form->make_request
+
+Will return an C<HTTP::Request> object that reflects the current setting
+of the form. You might want to use the click() method instead.
+
+=cut
+
+sub make_request
+{
+ my $self = shift;
+ my $method = uc $self->{'method'};
+ my $uri = $self->{'action'};
+ my $enctype = $self->{'enctype'};
+ my @form = $self->form;
+
+ my $charset = $self->accept_charset eq "UNKNOWN" ? $self->{default_charset} : $self->accept_charset;
+ if ($Encode_available) {
+ foreach my $fi (@form) {
+ $fi = Encode::encode($charset, $fi) unless ref($fi);
+ }
+ }
+
+ if ($method eq "GET") {
+ require HTTP::Request;
+ $uri = URI->new($uri, "http");
+ $uri->query_form(@form);
+ return HTTP::Request->new(GET => $uri);
+ }
+ elsif ($method eq "POST") {
+ require HTTP::Request::Common;
+ return HTTP::Request::Common::POST($uri, \@form,
+ Content_Type => $enctype);
+ }
+ else {
+ Carp::croak("Unknown method '$method'");
+ }
+}
+
+
+=item $request = $form->click
+
+=item $request = $form->click( $selector )
+
+=item $request = $form->click( $x, $y )
+
+=item $request = $form->click( $selector, $x, $y )
+
+Will "click" on the first clickable input (which will be of type
+C<submit> or C<image>). The result of clicking is an C<HTTP::Request>
+object that can then be passed to C<LWP::UserAgent> if you want to
+obtain the server response.
+
+If a $selector is specified, we will click on the first clickable input
+matching the selector, and the method will croak if no matching clickable
+input is found. If $selector is I<not> specified, then it
+is ok if the form contains no clickable inputs. In this case the
+click() method returns the same request as the make_request() method
+would do. See description of the find_input() method above for how
+the $selector is specified.
+
+If there are multiple clickable inputs with the same name, then there
+is no way to get the click() method of the C<HTML::Form> to click on
+any but the first. If you need this you would have to locate the
+input with find_input() and invoke the click() method on the given
+input yourself.
+
+A click coordinate pair can also be provided, but this only makes a
+difference if you clicked on an image. The default coordinate is
+(1,1). The upper-left corner of the image is (0,0), but some badly
+coded CGI scripts are known to not recognize this. Therefore (1,1) was
+selected as a safer default.
+
+=cut
+
+sub click
+{
+ my $self = shift;
+ my $name;
+ $name = shift if (@_ % 2) == 1; # odd number of arguments
+
+ # try to find first submit button to activate
+ for (@{$self->{'inputs'}}) {
+ next unless $_->can("click");
+ next if $name && !$_->selected($name);
+ next if $_->disabled;
+ return $_->click($self, @_);
+ }
+ Carp::croak("No clickable input with name $name") if $name;
+ $self->make_request;
+}
+
+
+=item @kw = $form->form
+
+Returns the current setting as a sequence of key/value pairs. Note
+that keys might be repeated, which means that some values might be
+lost if the return values are assigned to a hash.
+
+In scalar context this method returns the number of key/value pairs
+generated.
+
+=cut
+
+sub form
+{
+ my $self = shift;
+ map { $_->form_name_value($self) } @{$self->{'inputs'}};
+}
+
+
+=item $form->dump
+
+Returns a textual representation of current state of the form. Mainly
+useful for debugging. If called in void context, then the dump is
+printed on STDERR.
+
+=cut
+
+sub dump
+{
+ my $self = shift;
+ my $method = $self->{'method'};
+ my $uri = $self->{'action'};
+ my $enctype = $self->{'enctype'};
+ my $dump = "$method $uri";
+ $dump .= " ($enctype)"
+ if $enctype ne "application/x-www-form-urlencoded";
+ $dump .= " [$self->{attr}{name}]"
+ if exists $self->{attr}{name};
+ $dump .= "\n";
+ for ($self->inputs) {
+ $dump .= " " . $_->dump . "\n";
+ }
+ print STDERR $dump unless defined wantarray;
+ $dump;
+}
+
+
+#---------------------------------------------------
+package HTML::Form::Input;
+
+=back
+
+=head1 INPUTS
+
+An C<HTML::Form> objects contains a sequence of I<inputs>. References to
+the inputs can be obtained with the $form->inputs or $form->find_input
+methods.
+
+Note that there is I<not> a one-to-one correspondence between input
+I<objects> and E<lt>inputE<gt> I<elements> in the HTML document. An
+input object basically represents a name/value pair, so when multiple
+HTML elements contribute to the same name/value pair in the submitted
+form they are combined.
+
+The input elements that are mapped one-to-one are "text", "textarea",
+"password", "hidden", "file", "image", "submit" and "checkbox". For
+the "radio" and "option" inputs the story is not as simple: All
+E<lt>input type="radio"E<gt> elements with the same name will
+contribute to the same input radio object. The number of radio input
+objects will be the same as the number of distinct names used for the
+E<lt>input type="radio"E<gt> elements. For a E<lt>selectE<gt> element
+without the C<multiple> attribute there will be one input object of
+type of "option". For a E<lt>select multipleE<gt> element there will
+be one input object for each contained E<lt>optionE<gt> element. Each
+one of these option objects will have the same name.
+
+The following methods are available for the I<input> objects:
+
+=over 4
+
+=cut
+
+sub new
+{
+ my $class = shift;
+ my $self = bless {@_}, $class;
+ $self;
+}
+
+sub add_to_form
+{
+ my($self, $form) = @_;
+ push(@{$form->{'inputs'}}, $self);
+ $self;
+}
+
+sub strict {
+ my $self = shift;
+ my $old = $self->{strict};
+ if (@_) {
+ $self->{strict} = shift;
+ }
+ $old;
+}
+
+sub fixup {}
+
+
+=item $input->type
+
+Returns the type of this input. The type is one of the following
+strings: "text", "password", "hidden", "textarea", "file", "image", "submit",
+"radio", "checkbox" or "option".
+
+=cut
+
+sub type
+{
+ shift->{type};
+}
+
+=item $name = $input->name
+
+=item $input->name( $new_name )
+
+This method can be used to get/set the current name of the input.
+
+=item $input->id
+
+=item $input->class
+
+These methods can be used to get/set the current id or class attribute for the input.
+
+=item $input->selected( $selector )
+
+Returns TRUE if the given selector matched the input. See the description of
+the find_input() method above for a description of the selector syntax.
+
+=item $value = $input->value
+
+=item $input->value( $new_value )
+
+This method can be used to get/set the current value of an
+input.
+
+If strict is enabled and the input only can take an enumerated list of values,
+then it is an error to try to set it to something else and the method will
+croak if you try.
+
+You will also be able to set the value of read-only inputs, but a
+warning will be generated if running under C<perl -w>.
+
+=cut
+
+sub name
+{
+ my $self = shift;
+ my $old = $self->{name};
+ $self->{name} = shift if @_;
+ $old;
+}
+
+sub id
+{
+ my $self = shift;
+ my $old = $self->{id};
+ $self->{id} = shift if @_;
+ $old;
+}
+
+sub class
+{
+ my $self = shift;
+ my $old = $self->{class};
+ $self->{class} = shift if @_;
+ $old;
+}
+
+sub selected {
+ my($self, $sel) = @_;
+ return undef unless defined $sel;
+ my $attr =
+ $sel =~ s/^\^// ? "name" :
+ $sel =~ s/^#// ? "id" :
+ $sel =~ s/^\.// ? "class" :
+ "name";
+ return 0 unless defined $self->{$attr};
+ return $self->{$attr} eq $sel;
+}
+
+sub value
+{
+ my $self = shift;
+ my $old = $self->{value};
+ $self->{value} = shift if @_;
+ $old;
+}
+
+=item $input->possible_values
+
+Returns a list of all values that an input can take. For inputs that
+do not have discrete values, this returns an empty list.
+
+=cut
+
+sub possible_values
+{
+ return;
+}
+
+=item $input->other_possible_values
+
+Returns a list of all values not tried yet.
+
+=cut
+
+sub other_possible_values
+{
+ return;
+}
+
+=item $input->value_names
+
+For some inputs the values can have names that are different from the
+values themselves. The number of names returned by this method will
+match the number of values reported by $input->possible_values.
+
+When setting values using the value() method it is also possible to
+use the value names in place of the value itself.
+
+=cut
+
+sub value_names {
+ return
+}
+
+=item $bool = $input->readonly
+
+=item $input->readonly( $bool )
+
+This method is used to get/set the value of the readonly attribute.
+You are allowed to modify the value of readonly inputs, but setting
+the value will generate some noise when warnings are enabled. Hidden
+fields always start out readonly.
+
+=cut
+
+sub readonly {
+ my $self = shift;
+ my $old = $self->{readonly};
+ $self->{readonly} = shift if @_;
+ $old;
+}
+
+=item $bool = $input->disabled
+
+=item $input->disabled( $bool )
+
+This method is used to get/set the value of the disabled attribute.
+Disabled inputs do not contribute any key/value pairs for the form
+value.
+
+=cut
+
+sub disabled {
+ my $self = shift;
+ my $old = $self->{disabled};
+ $self->{disabled} = shift if @_;
+ $old;
+}
+
+=item $input->form_name_value
+
+Returns a (possible empty) list of key/value pairs that should be
+incorporated in the form value from this input.
+
+=cut
+
+sub form_name_value
+{
+ my $self = shift;
+ my $name = $self->{'name'};
+ return unless defined $name;
+ return if $self->disabled;
+ my $value = $self->value;
+ return unless defined $value;
+ return ($name => $value);
+}
+
+sub dump
+{
+ my $self = shift;
+ my $name = $self->name;
+ $name = "<NONAME>" unless defined $name;
+ my $value = $self->value;
+ $value = "<UNDEF>" unless defined $value;
+ my $dump = "$name=$value";
+
+ my $type = $self->type;
+
+ $type .= " disabled" if $self->disabled;
+ $type .= " readonly" if $self->readonly;
+ return sprintf "%-30s %s", $dump, "($type)" unless $self->{menu};
+
+ my @menu;
+ my $i = 0;
+ for (@{$self->{menu}}) {
+ my $opt = $_->{value};
+ $opt = "<UNDEF>" unless defined $opt;
+ $opt .= "/$_->{name}"
+ if defined $_->{name} && length $_->{name} && $_->{name} ne $opt;
+ substr($opt,0,0) = "-" if $_->{disabled};
+ if (exists $self->{current} && $self->{current} == $i) {
+ substr($opt,0,0) = "!" unless $_->{seen};
+ substr($opt,0,0) = "*";
+ }
+ else {
+ substr($opt,0,0) = ":" if $_->{seen};
+ }
+ push(@menu, $opt);
+ $i++;
+ }
+
+ return sprintf "%-30s %-10s %s", $dump, "($type)", "[" . join("|", @menu) . "]";
+}
+
+
+#---------------------------------------------------
+package HTML::Form::TextInput;
+@HTML::Form::TextInput::ISA=qw(HTML::Form::Input);
+
+#input/text
+#input/password
+#input/hidden
+#textarea
+
+sub value
+{
+ my $self = shift;
+ my $old = $self->{value};
+ $old = "" unless defined $old;
+ if (@_) {
+ Carp::croak("Input '$self->{name}' is readonly")
+ if $self->{strict} && $self->{readonly};
+ my $new = shift;
+ my $n = exists $self->{maxlength} ? $self->{maxlength} : undef;
+ Carp::croak("Input '$self->{name}' has maxlength '$n'")
+ if $self->{strict} && defined($n) && defined($new) && length($new) > $n;
+ $self->{value} = $new;
+ }
+ $old;
+}
+
+#---------------------------------------------------
+package HTML::Form::IgnoreInput;
+@HTML::Form::IgnoreInput::ISA=qw(HTML::Form::Input);
+
+#input/button
+#input/reset
+
+sub value { return }
+
+
+#---------------------------------------------------
+package HTML::Form::ListInput;
+@HTML::Form::ListInput::ISA=qw(HTML::Form::Input);
+
+#select/option (val1, val2, ....)
+#input/radio (undef, val1, val2,...)
+#input/checkbox (undef, value)
+#select-multiple/option (undef, value)
+
+sub new
+{
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+
+ my $value = delete $self->{value};
+ my $value_name = delete $self->{value_name};
+ my $type = $self->{type};
+
+ if ($type eq "checkbox") {
+ $value = "on" unless defined $value;
+ $self->{menu} = [
+ { value => undef, name => "off", },
+ { value => $value, name => $value_name, },
+ ];
+ $self->{current} = (delete $self->{checked}) ? 1 : 0;
+ ;
+ }
+ else {
+ $self->{option_disabled}++
+ if $type eq "radio" && delete $self->{disabled};
+ $self->{menu} = [
+ {value => $value, name => $value_name},
+ ];
+ my $checked = $self->{checked} || $self->{option_selected};
+ delete $self->{checked};
+ delete $self->{option_selected};
+ if (exists $self->{multiple}) {
+ unshift(@{$self->{menu}}, { value => undef, name => "off"});
+ $self->{current} = $checked ? 1 : 0;
+ }
+ else {
+ $self->{current} = 0 if $checked;
+ }
+ }
+ $self;
+}
+
+sub add_to_form
+{
+ my($self, $form) = @_;
+ my $type = $self->type;
+
+ return $self->SUPER::add_to_form($form)
+ if $type eq "checkbox";
+
+ if ($type eq "option" && exists $self->{multiple}) {
+ $self->{disabled} ||= delete $self->{option_disabled};
+ return $self->SUPER::add_to_form($form);
+ }
+
+ die "Assert" if @{$self->{menu}} != 1;
+ my $m = $self->{menu}[0];
+ $m->{disabled}++ if delete $self->{option_disabled};
+
+ my $prev = $form->find_input($self->{name}, $self->{type}, $self->{idx});
+ return $self->SUPER::add_to_form($form) unless $prev;
+
+ # merge menues
+ $prev->{current} = @{$prev->{menu}} if exists $self->{current};
+ push(@{$prev->{menu}}, $m);
+}
+
+sub fixup
+{
+ my $self = shift;
+ if ($self->{type} eq "option" && !(exists $self->{current})) {
+ $self->{current} = 0;
+ }
+ $self->{menu}[$self->{current}]{seen}++ if exists $self->{current};
+}
+
+sub disabled
+{
+ my $self = shift;
+ my $type = $self->type;
+
+ my $old = $self->{disabled} || _menu_all_disabled(@{$self->{menu}});
+ if (@_) {
+ my $v = shift;
+ $self->{disabled} = $v;
+ for (@{$self->{menu}}) {
+ $_->{disabled} = $v;
+ }
+ }
+ return $old;
+}
+
+sub _menu_all_disabled {
+ for (@_) {
+ return 0 unless $_->{disabled};
+ }
+ return 1;
+}
+
+sub value
+{
+ my $self = shift;
+ my $old;
+ $old = $self->{menu}[$self->{current}]{value} if exists $self->{current};
+ $old = $self->{value} if exists $self->{value};
+ if (@_) {
+ my $i = 0;
+ my $val = shift;
+ my $cur;
+ my $disabled;
+ for (@{$self->{menu}}) {
+ if ((defined($val) && defined($_->{value}) && $val eq $_->{value}) ||
+ (!defined($val) && !defined($_->{value}))
+ )
+ {
+ $cur = $i;
+ $disabled = $_->{disabled};
+ last unless $disabled;
+ }
+ $i++;
+ }
+ if (!(defined $cur) || $disabled) {
+ if (defined $val) {
+ # try to search among the alternative names as well
+ my $i = 0;
+ my $cur_ignorecase;
+ my $lc_val = lc($val);
+ for (@{$self->{menu}}) {
+ if (defined $_->{name}) {
+ if ($val eq $_->{name}) {
+ $disabled = $_->{disabled};
+ $cur = $i;
+ last unless $disabled;
+ }
+ if (!defined($cur_ignorecase) && $lc_val eq lc($_->{name})) {
+ $cur_ignorecase = $i;
+ }
+ }
+ $i++;
+ }
+ unless (defined $cur) {
+ $cur = $cur_ignorecase;
+ if (defined $cur) {
+ $disabled = $self->{menu}[$cur]{disabled};
+ }
+ elsif ($self->{strict}) {
+ my $n = $self->name;
+ Carp::croak("Illegal value '$val' for field '$n'");
+ }
+ }
+ }
+ elsif ($self->{strict}) {
+ my $n = $self->name;
+ Carp::croak("The '$n' field can't be unchecked");
+ }
+ }
+ if ($self->{strict} && $disabled) {
+ my $n = $self->name;
+ Carp::croak("The value '$val' has been disabled for field '$n'");
+ }
+ if (defined $cur) {
+ $self->{current} = $cur;
+ $self->{menu}[$cur]{seen}++;
+ delete $self->{value};
+ }
+ else {
+ $self->{value} = $val;
+ delete $self->{current};
+ }
+ }
+ $old;
+}
+
+=item $input->check
+
+Some input types represent toggles that can be turned on/off. This
+includes "checkbox" and "option" inputs. Calling this method turns
+this input on without having to know the value name. If the input is
+already on, then nothing happens.
+
+This has the same effect as:
+
+ $input->value($input->possible_values[1]);
+
+The input can be turned off with:
+
+ $input->value(undef);
+
+=cut
+
+sub check
+{
+ my $self = shift;
+ $self->{current} = 1;
+ $self->{menu}[1]{seen}++;
+}
+
+sub possible_values
+{
+ my $self = shift;
+ map $_->{value}, grep !$_->{disabled}, @{$self->{menu}};
+}
+
+sub other_possible_values
+{
+ my $self = shift;
+ map $_->{value}, grep !$_->{seen} && !$_->{disabled}, @{$self->{menu}};
+}
+
+sub value_names {
+ my $self = shift;
+ my @names;
+ for (@{$self->{menu}}) {
+ my $n = $_->{name};
+ $n = $_->{value} unless defined $n;
+ push(@names, $n);
+ }
+ @names;
+}
+
+
+#---------------------------------------------------
+package HTML::Form::SubmitInput;
+@HTML::Form::SubmitInput::ISA=qw(HTML::Form::Input);
+
+#input/image
+#input/submit
+
+=item $input->click($form, $x, $y)
+
+Some input types (currently "submit" buttons and "images") can be
+clicked to submit the form. The click() method returns the
+corresponding C<HTTP::Request> object.
+
+=cut
+
+sub click
+{
+ my($self,$form,$x,$y) = @_;
+ for ($x, $y) { $_ = 1 unless defined; }
+ local($self->{clicked}) = [$x,$y];
+ return $form->make_request;
+}
+
+sub form_name_value
+{
+ my $self = shift;
+ return unless $self->{clicked};
+ return $self->SUPER::form_name_value(@_);
+}
+
+
+#---------------------------------------------------
+package HTML::Form::ImageInput;
+@HTML::Form::ImageInput::ISA=qw(HTML::Form::SubmitInput);
+
+sub form_name_value
+{
+ my $self = shift;
+ my $clicked = $self->{clicked};
+ return unless $clicked;
+ return if $self->{disabled};
+ my $name = $self->{name};
+ $name = (defined($name) && length($name)) ? "$name." : "";
+ return ("${name}x" => $clicked->[0],
+ "${name}y" => $clicked->[1]
+ );
+}
+
+#---------------------------------------------------
+package HTML::Form::FileInput;
+@HTML::Form::FileInput::ISA=qw(HTML::Form::TextInput);
+
+=back
+
+If the input is of type C<file>, then it has these additional methods:
+
+=over 4
+
+=item $input->file
+
+This is just an alias for the value() method. It sets the filename to
+read data from.
+
+For security reasons this field will never be initialized from the parsing
+of a form. This prevents the server from triggering stealth uploads of
+arbitrary files from the client machine.
+
+=cut
+
+sub file {
+ my $self = shift;
+ $self->value(@_);
+}
+
+=item $filename = $input->filename
+
+=item $input->filename( $new_filename )
+
+This get/sets the filename reported to the server during file upload.
+This attribute defaults to the value reported by the file() method.
+
+=cut
+
+sub filename {
+ my $self = shift;
+ my $old = $self->{filename};
+ $self->{filename} = shift if @_;
+ $old = $self->file unless defined $old;
+ $old;
+}
+
+=item $content = $input->content
+
+=item $input->content( $new_content )
+
+This get/sets the file content provided to the server during file
+upload. This method can be used if you do not want the content to be
+read from an actual file.
+
+=cut
+
+sub content {
+ my $self = shift;
+ my $old = $self->{content};
+ $self->{content} = shift if @_;
+ $old;
+}
+
+=item @headers = $input->headers
+
+=item input->headers($key => $value, .... )
+
+This get/set additional header fields describing the file uploaded.
+This can for instance be used to set the C<Content-Type> reported for
+the file.
+
+=cut
+
+sub headers {
+ my $self = shift;
+ my $old = $self->{headers} || [];
+ $self->{headers} = [@_] if @_;
+ @$old;
+}
+
+sub form_name_value {
+ my($self, $form) = @_;
+ return $self->SUPER::form_name_value($form)
+ if $form->method ne "POST" ||
+ $form->enctype ne "multipart/form-data";
+
+ my $name = $self->name;
+ return unless defined $name;
+ return if $self->{disabled};
+
+ my $file = $self->file;
+ my $filename = $self->filename;
+ my @headers = $self->headers;
+ my $content = $self->content;
+ if (defined $content) {
+ $filename = $file unless defined $filename;
+ $file = undef;
+ unshift(@headers, "Content" => $content);
+ }
+ elsif (!defined($file) || length($file) == 0) {
+ return;
+ }
+
+ # legacy (this used to be the way to do it)
+ if (ref($file) eq "ARRAY") {
+ my $f = shift @$file;
+ my $fn = shift @$file;
+ push(@headers, @$file);
+ $file = $f;
+ $filename = $fn unless defined $filename;
+ }
+
+ return ($name => [$file, $filename, @headers]);
+}
+
+package HTML::Form::KeygenInput;
+@HTML::Form::KeygenInput::ISA=qw(HTML::Form::Input);
+
+sub challenge {
+ my $self = shift;
+ return $self->{challenge};
+}
+
+sub keytype {
+ my $self = shift;
+ return lc($self->{keytype} || 'rsa');
+}
+
+1;
+
+__END__
+
+=back
+
+=head1 SEE ALSO
+
+L<LWP>, L<LWP::UserAgent>, L<HTML::Parser>
+
+=head1 COPYRIGHT
+
+Copyright 1998-2008 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package HTTP::Config;
+
+use strict;
+use URI;
+use vars qw($VERSION);
+
+$VERSION = "5.835";
+
+sub new {
+ my $class = shift;
+ return bless [], $class;
+}
+
+sub entries {
+ my $self = shift;
+ @$self;
+}
+
+sub empty {
+ my $self = shift;
+ not @$self;
+}
+
+sub add {
+ if (@_ == 2) {
+ my $self = shift;
+ push(@$self, shift);
+ return;
+ }
+ my($self, %spec) = @_;
+ push(@$self, \%spec);
+ return;
+}
+
+sub find2 {
+ my($self, %spec) = @_;
+ my @found;
+ my @rest;
+ ITEM:
+ for my $item (@$self) {
+ for my $k (keys %spec) {
+ if (!exists $item->{$k} || $spec{$k} ne $item->{$k}) {
+ push(@rest, $item);
+ next ITEM;
+ }
+ }
+ push(@found, $item);
+ }
+ return \@found unless wantarray;
+ return \@found, \@rest;
+}
+
+sub find {
+ my $self = shift;
+ my $f = $self->find2(@_);
+ return @$f if wantarray;
+ return $f->[0];
+}
+
+sub remove {
+ my($self, %spec) = @_;
+ my($removed, $rest) = $self->find2(%spec);
+ @$self = @$rest if @$removed;
+ return @$removed;
+}
+
+my %MATCH = (
+ m_scheme => sub {
+ my($v, $uri) = @_;
+ return $uri->_scheme eq $v; # URI known to be canonical
+ },
+ m_secure => sub {
+ my($v, $uri) = @_;
+ my $secure = $uri->can("secure") ? $uri->secure : $uri->_scheme eq "https";
+ return $secure == !!$v;
+ },
+ m_host_port => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("host_port");
+ return $uri->host_port eq $v, 7;
+ },
+ m_host => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("host");
+ return $uri->host eq $v, 6;
+ },
+ m_port => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("port");
+ return $uri->port eq $v;
+ },
+ m_domain => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("host");
+ my $h = $uri->host;
+ $h = "$h.local" unless $h =~ /\./;
+ $v = ".$v" unless $v =~ /^\./;
+ return length($v), 5 if substr($h, -length($v)) eq $v;
+ return 0;
+ },
+ m_path => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("path");
+ return $uri->path eq $v, 4;
+ },
+ m_path_prefix => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("path");
+ my $path = $uri->path;
+ my $len = length($v);
+ return $len, 3 if $path eq $v;
+ return 0 if length($path) <= $len;
+ $v .= "/" unless $v =~ m,/\z,,;
+ return $len, 3 if substr($path, 0, length($v)) eq $v;
+ return 0;
+ },
+ m_path_match => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("path");
+ return $uri->path =~ $v;
+ },
+ m_uri__ => sub {
+ my($v, $k, $uri) = @_;
+ return unless $uri->can($k);
+ return 1 unless defined $v;
+ return $uri->$k eq $v;
+ },
+ m_method => sub {
+ my($v, $uri, $request) = @_;
+ return $request && $request->method eq $v;
+ },
+ m_proxy => sub {
+ my($v, $uri, $request) = @_;
+ return $request && ($request->{proxy} || "") eq $v;
+ },
+ m_code => sub {
+ my($v, $uri, $request, $response) = @_;
+ $v =~ s/xx\z//;
+ return unless $response;
+ return length($v), 2 if substr($response->code, 0, length($v)) eq $v;
+ },
+ m_media_type => sub { # for request too??
+ my($v, $uri, $request, $response) = @_;
+ return unless $response;
+ return 1, 1 if $v eq "*/*";
+ my $ct = $response->content_type;
+ return 2, 1 if $v =~ s,/\*\z,, && $ct =~ m,^\Q$v\E/,;
+ return 3, 1 if $v eq "html" && $response->content_is_html;
+ return 4, 1 if $v eq "xhtml" && $response->content_is_xhtml;
+ return 10, 1 if $v eq $ct;
+ return 0;
+ },
+ m_header__ => sub {
+ my($v, $k, $uri, $request, $response) = @_;
+ return unless $request;
+ return 1 if $request->header($k) eq $v;
+ return 1 if $response && $response->header($k) eq $v;
+ return 0;
+ },
+ m_response_attr__ => sub {
+ my($v, $k, $uri, $request, $response) = @_;
+ return unless $response;
+ return 1 if !defined($v) && exists $response->{$k};
+ return 0 unless exists $response->{$k};
+ return 1 if $response->{$k} eq $v;
+ return 0;
+ },
+);
+
+sub matching {
+ my $self = shift;
+ if (@_ == 1) {
+ if ($_[0]->can("request")) {
+ unshift(@_, $_[0]->request);
+ unshift(@_, undef) unless defined $_[0];
+ }
+ unshift(@_, $_[0]->uri_canonical) if $_[0] && $_[0]->can("uri_canonical");
+ }
+ my($uri, $request, $response) = @_;
+ $uri = URI->new($uri) unless ref($uri);
+
+ my @m;
+ ITEM:
+ for my $item (@$self) {
+ my $order;
+ for my $ikey (keys %$item) {
+ my $mkey = $ikey;
+ my $k;
+ $k = $1 if $mkey =~ s/__(.*)/__/;
+ if (my $m = $MATCH{$mkey}) {
+ #print "$ikey $mkey\n";
+ my($c, $o);
+ my @arg = (
+ defined($k) ? $k : (),
+ $uri, $request, $response
+ );
+ my $v = $item->{$ikey};
+ $v = [$v] unless ref($v) eq "ARRAY";
+ for (@$v) {
+ ($c, $o) = $m->($_, @arg);
+ #print " - $_ ==> $c $o\n";
+ last if $c;
+ }
+ next ITEM unless $c;
+ $order->[$o || 0] += $c;
+ }
+ }
+ $order->[7] ||= 0;
+ $item->{_order} = join(".", reverse map sprintf("%03d", $_ || 0), @$order);
+ push(@m, $item);
+ }
+ @m = sort { $b->{_order} cmp $a->{_order} } @m;
+ delete $_->{_order} for @m;
+ return @m if wantarray;
+ return $m[0];
+}
+
+sub add_item {
+ my $self = shift;
+ my $item = shift;
+ return $self->add(item => $item, @_);
+}
+
+sub remove_items {
+ my $self = shift;
+ return map $_->{item}, $self->remove(@_);
+}
+
+sub matching_items {
+ my $self = shift;
+ return map $_->{item}, $self->matching(@_);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Config - Configuration for request and response objects
+
+=head1 SYNOPSIS
+
+ use HTTP::Config;
+ my $c = HTTP::Config->new;
+ $c->add(m_domain => ".example.com", m_scheme => "http", verbose => 1);
+
+ use HTTP::Request;
+ my $request = HTTP::Request->new(GET => "http://www.example.com");
+
+ if (my @m = $c->matching($request)) {
+ print "Yadayada\n" if $m[0]->{verbose};
+ }
+
+=head1 DESCRIPTION
+
+An C<HTTP::Config> object is a list of entries that
+can be matched against request or request/response pairs. Its
+purpose is to hold configuration data that can be looked up given a
+request or response object.
+
+Each configuration entry is a hash. Some keys specify matching to
+occur against attributes of request/response objects. Other keys can
+be used to hold user data.
+
+The following methods are provided:
+
+=over 4
+
+=item $conf = HTTP::Config->new
+
+Constructs a new empty C<HTTP::Config> object and returns it.
+
+=item $conf->entries
+
+Returns the list of entries in the configuration object.
+In scalar context returns the number of entries.
+
+=item $conf->empty
+
+Return true if there are no entries in the configuration object.
+This is just a shorthand for C<< not $conf->entries >>.
+
+=item $conf->add( %matchspec, %other )
+
+=item $conf->add( \%entry )
+
+Adds a new entry to the configuration.
+You can either pass separate key/value pairs or a hash reference.
+
+=item $conf->remove( %spec )
+
+Removes (and returns) the entries that have matches for all the key/value pairs in %spec.
+If %spec is empty this will match all entries; so it will empty the configuation object.
+
+=item $conf->matching( $uri, $request, $response )
+
+=item $conf->matching( $uri )
+
+=item $conf->matching( $request )
+
+=item $conf->matching( $response )
+
+Returns the entries that match the given $uri, $request and $response triplet.
+
+If called with a single $request object then the $uri is obtained by calling its 'uri_canonical' method.
+If called with a single $response object, then the request object is obtained by calling its 'request' method;
+and then the $uri is obtained as if a single $request was provided.
+
+The entries are returned with the most specific matches first.
+In scalar context returns the most specific match or C<undef> in none match.
+
+=item $conf->add_item( $item, %matchspec )
+
+=item $conf->remove_items( %spec )
+
+=item $conf->matching_items( $uri, $request, $response )
+
+Wrappers that hides the entries themselves.
+
+=back
+
+=head2 Matching
+
+The following keys on a configuration entry specify matching. For all
+of these you can provide an array of values instead of a single value.
+The entry matches if at least one of the values in the array matches.
+
+Entries that require match against a response object attribute will never match
+unless a response object was provided.
+
+=over
+
+=item m_scheme => $scheme
+
+Matches if the URI uses the specified scheme; e.g. "http".
+
+=item m_secure => $bool
+
+If $bool is TRUE; matches if the URI uses a secure scheme. If $bool
+is FALSE; matches if the URI does not use a secure scheme. An example
+of a secure scheme is "https".
+
+=item m_host_port => "$hostname:$port"
+
+Matches if the URI's host_port method return the specified value.
+
+=item m_host => $hostname
+
+Matches if the URI's host method returns the specified value.
+
+=item m_port => $port
+
+Matches if the URI's port method returns the specified value.
+
+=item m_domain => ".$domain"
+
+Matches if the URI's host method return a value that within the given
+domain. The hostname "www.example.com" will for instance match the
+domain ".com".
+
+=item m_path => $path
+
+Matches if the URI's path method returns the specified value.
+
+=item m_path_prefix => $path
+
+Matches if the URI's path is the specified path or has the specified
+path as prefix.
+
+=item m_path_match => $Regexp
+
+Matches if the regular expression matches the URI's path. Eg. qr/\.html$/.
+
+=item m_method => $method
+
+Matches if the request method matches the specified value. Eg. "GET" or "POST".
+
+=item m_code => $digit
+
+=item m_code => $status_code
+
+Matches if the response status code matches. If a single digit is
+specified; matches for all response status codes beginning with that digit.
+
+=item m_proxy => $url
+
+Matches if the request is to be sent to the given Proxy server.
+
+=item m_media_type => "*/*"
+
+=item m_media_type => "text/*"
+
+=item m_media_type => "html"
+
+=item m_media_type => "xhtml"
+
+=item m_media_type => "text/html"
+
+Matches if the response media type matches.
+
+With a value of "html" matches if $response->content_is_html returns TRUE.
+With a value of "xhtml" matches if $response->content_is_xhtml returns TRUE.
+
+=item m_uri__I<$method> => undef
+
+Matches if the URI object provides the method.
+
+=item m_uri__I<$method> => $string
+
+Matches if the URI's $method method returns the given value.
+
+=item m_header__I<$field> => $string
+
+Matches if either the request or the response have a header $field with the given value.
+
+=item m_response_attr__I<$key> => undef
+
+=item m_response_attr__I<$key> => $string
+
+Matches if the response object has that key, or the entry has the given value.
+
+=back
+
+=head1 SEE ALSO
+
+L<URI>, L<HTTP::Request>, L<HTTP::Response>
+
+=head1 COPYRIGHT
+
+Copyright 2008, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package HTTP::Cookies;
+
+use strict;
+use HTTP::Date qw(str2time parse_date time2str);
+use HTTP::Headers::Util qw(_split_header_words join_header_words);
+
+use vars qw($VERSION $EPOCH_OFFSET);
+$VERSION = "5.833";
+
+# Legacy: because "use "HTTP::Cookies" used be the ONLY way
+# to load the class HTTP::Cookies::Netscape.
+require HTTP::Cookies::Netscape;
+
+$EPOCH_OFFSET = 0; # difference from Unix epoch
+if ($^O eq "MacOS") {
+ require Time::Local;
+ $EPOCH_OFFSET = Time::Local::timelocal(0,0,0,1,0,70);
+}
+
+# A HTTP::Cookies object is a hash. The main attribute is the
+# COOKIES 3 level hash: $self->{COOKIES}{$domain}{$path}{$key}.
+
+sub new
+{
+ my $class = shift;
+ my $self = bless {
+ COOKIES => {},
+ }, $class;
+ my %cnf = @_;
+ for (keys %cnf) {
+ $self->{lc($_)} = $cnf{$_};
+ }
+ $self->load;
+ $self;
+}
+
+
+sub add_cookie_header
+{
+ my $self = shift;
+ my $request = shift || return;
+ my $url = $request->uri;
+ my $scheme = $url->scheme;
+ unless ($scheme =~ /^https?\z/) {
+ return;
+ }
+
+ my $domain = _host($request, $url);
+ $domain = "$domain.local" unless $domain =~ /\./;
+ my $secure_request = ($scheme eq "https");
+ my $req_path = _url_path($url);
+ my $req_port = $url->port;
+ my $now = time();
+ _normalize_path($req_path) if $req_path =~ /%/;
+
+ my @cval; # cookie values for the "Cookie" header
+ my $set_ver;
+ my $netscape_only = 0; # An exact domain match applies to any cookie
+
+ while ($domain =~ /\./) {
+ # Checking $domain for cookies"
+ my $cookies = $self->{COOKIES}{$domain};
+ next unless $cookies;
+ if ($self->{delayload} && defined($cookies->{'//+delayload'})) {
+ my $cookie_data = $cookies->{'//+delayload'}{'cookie'};
+ delete $self->{COOKIES}{$domain};
+ $self->load_cookie($cookie_data->[1]);
+ $cookies = $self->{COOKIES}{$domain};
+ next unless $cookies; # should not really happen
+ }
+
+ # Want to add cookies corresponding to the most specific paths
+ # first (i.e. longest path first)
+ my $path;
+ for $path (sort {length($b) <=> length($a) } keys %$cookies) {
+ if (index($req_path, $path) != 0) {
+ next;
+ }
+
+ my($key,$array);
+ while (($key,$array) = each %{$cookies->{$path}}) {
+ my($version,$val,$port,$path_spec,$secure,$expires) = @$array;
+ if ($secure && !$secure_request) {
+ next;
+ }
+ if ($expires && $expires < $now) {
+ next;
+ }
+ if ($port) {
+ my $found;
+ if ($port =~ s/^_//) {
+ # The corresponding Set-Cookie attribute was empty
+ $found++ if $port eq $req_port;
+ $port = "";
+ }
+ else {
+ my $p;
+ for $p (split(/,/, $port)) {
+ $found++, last if $p eq $req_port;
+ }
+ }
+ unless ($found) {
+ next;
+ }
+ }
+ if ($version > 0 && $netscape_only) {
+ next;
+ }
+
+ # set version number of cookie header.
+ # XXX: What should it be if multiple matching
+ # Set-Cookie headers have different versions themselves
+ if (!$set_ver++) {
+ if ($version >= 1) {
+ push(@cval, "\$Version=$version");
+ }
+ elsif (!$self->{hide_cookie2}) {
+ $request->header(Cookie2 => '$Version="1"');
+ }
+ }
+
+ # do we need to quote the value
+ if ($val =~ /\W/ && $version) {
+ $val =~ s/([\\\"])/\\$1/g;
+ $val = qq("$val");
+ }
+
+ # and finally remember this cookie
+ push(@cval, "$key=$val");
+ if ($version >= 1) {
+ push(@cval, qq(\$Path="$path")) if $path_spec;
+ push(@cval, qq(\$Domain="$domain")) if $domain =~ /^\./;
+ if (defined $port) {
+ my $p = '$Port';
+ $p .= qq(="$port") if length $port;
+ push(@cval, $p);
+ }
+ }
+
+ }
+ }
+
+ } continue {
+ # Try with a more general domain, alternately stripping
+ # leading name components and leading dots. When this
+ # results in a domain with no leading dot, it is for
+ # Netscape cookie compatibility only:
+ #
+ # a.b.c.net Any cookie
+ # .b.c.net Any cookie
+ # b.c.net Netscape cookie only
+ # .c.net Any cookie
+
+ if ($domain =~ s/^\.+//) {
+ $netscape_only = 1;
+ }
+ else {
+ $domain =~ s/[^.]*//;
+ $netscape_only = 0;
+ }
+ }
+
+ if (@cval) {
+ if (my $old = $request->header("Cookie")) {
+ unshift(@cval, $old);
+ }
+ $request->header(Cookie => join("; ", @cval));
+ }
+
+ $request;
+}
+
+
+sub extract_cookies
+{
+ my $self = shift;
+ my $response = shift || return;
+
+ my @set = _split_header_words($response->_header("Set-Cookie2"));
+ my @ns_set = $response->_header("Set-Cookie");
+
+ return $response unless @set || @ns_set; # quick exit
+
+ my $request = $response->request;
+ my $url = $request->uri;
+ my $req_host = _host($request, $url);
+ $req_host = "$req_host.local" unless $req_host =~ /\./;
+ my $req_port = $url->port;
+ my $req_path = _url_path($url);
+ _normalize_path($req_path) if $req_path =~ /%/;
+
+ if (@ns_set) {
+ # The old Netscape cookie format for Set-Cookie
+ # http://curl.haxx.se/rfc/cookie_spec.html
+ # can for instance contain an unquoted "," in the expires
+ # field, so we have to use this ad-hoc parser.
+ my $now = time();
+
+ # Build a hash of cookies that was present in Set-Cookie2
+ # headers. We need to skip them if we also find them in a
+ # Set-Cookie header.
+ my %in_set2;
+ for (@set) {
+ $in_set2{$_->[0]}++;
+ }
+
+ my $set;
+ for $set (@ns_set) {
+ $set =~ s/^\s+//;
+ my @cur;
+ my $param;
+ my $expires;
+ my $first_param = 1;
+ for $param (split(/;\s*/, $set)) {
+ next unless length($param);
+ my($k,$v) = split(/\s*=\s*/, $param, 2);
+ if (defined $v) {
+ $v =~ s/\s+$//;
+ #print "$k => $v\n";
+ }
+ else {
+ $k =~ s/\s+$//;
+ #print "$k => undef";
+ }
+ if (!$first_param && lc($k) eq "expires") {
+ my $etime = str2time($v);
+ if (defined $etime) {
+ push(@cur, "Max-Age" => $etime - $now);
+ $expires++;
+ }
+ else {
+ # parse_date can deal with years outside the range of time_t,
+ my($year, $mon, $day, $hour, $min, $sec, $tz) = parse_date($v);
+ if ($year) {
+ my $thisyear = (gmtime)[5] + 1900;
+ if ($year < $thisyear) {
+ push(@cur, "Max-Age" => -1); # any negative value will do
+ $expires++;
+ }
+ elsif ($year >= $thisyear + 10) {
+ # the date is at least 10 years into the future, just replace
+ # it with something approximate
+ push(@cur, "Max-Age" => 10 * 365 * 24 * 60 * 60);
+ $expires++;
+ }
+ }
+ }
+ }
+ elsif (!$first_param && lc($k) =~ /^(?:version|discard|ns-cookie)/) {
+ # ignore
+ }
+ else {
+ push(@cur, $k => $v);
+ }
+ $first_param = 0;
+ }
+ next unless @cur;
+ next if $in_set2{$cur[0]};
+
+# push(@cur, "Port" => $req_port);
+ push(@cur, "Discard" => undef) unless $expires;
+ push(@cur, "Version" => 0);
+ push(@cur, "ns-cookie" => 1);
+ push(@set, \@cur);
+ }
+ }
+
+ SET_COOKIE:
+ for my $set (@set) {
+ next unless @$set >= 2;
+
+ my $key = shift @$set;
+ my $val = shift @$set;
+
+ my %hash;
+ while (@$set) {
+ my $k = shift @$set;
+ my $v = shift @$set;
+ my $lc = lc($k);
+ # don't loose case distinction for unknown fields
+ $k = $lc if $lc =~ /^(?:discard|domain|max-age|
+ path|port|secure|version)$/x;
+ if ($k eq "discard" || $k eq "secure") {
+ $v = 1 unless defined $v;
+ }
+ next if exists $hash{$k}; # only first value is significant
+ $hash{$k} = $v;
+ };
+
+ my %orig_hash = %hash;
+ my $version = delete $hash{version};
+ $version = 1 unless defined($version);
+ my $discard = delete $hash{discard};
+ my $secure = delete $hash{secure};
+ my $maxage = delete $hash{'max-age'};
+ my $ns_cookie = delete $hash{'ns-cookie'};
+
+ # Check domain
+ my $domain = delete $hash{domain};
+ $domain = lc($domain) if defined $domain;
+ if (defined($domain)
+ && $domain ne $req_host && $domain ne ".$req_host") {
+ if ($domain !~ /\./ && $domain ne "local") {
+ next SET_COOKIE;
+ }
+ $domain = ".$domain" unless $domain =~ /^\./;
+ if ($domain =~ /\.\d+$/) {
+ next SET_COOKIE;
+ }
+ my $len = length($domain);
+ unless (substr($req_host, -$len) eq $domain) {
+ next SET_COOKIE;
+ }
+ my $hostpre = substr($req_host, 0, length($req_host) - $len);
+ if ($hostpre =~ /\./ && !$ns_cookie) {
+ next SET_COOKIE;
+ }
+ }
+ else {
+ $domain = $req_host;
+ }
+
+ my $path = delete $hash{path};
+ my $path_spec;
+ if (defined $path && $path ne '') {
+ $path_spec++;
+ _normalize_path($path) if $path =~ /%/;
+ if (!$ns_cookie &&
+ substr($req_path, 0, length($path)) ne $path) {
+ next SET_COOKIE;
+ }
+ }
+ else {
+ $path = $req_path;
+ $path =~ s,/[^/]*$,,;
+ $path = "/" unless length($path);
+ }
+
+ my $port;
+ if (exists $hash{port}) {
+ $port = delete $hash{port};
+ if (defined $port) {
+ $port =~ s/\s+//g;
+ my $found;
+ for my $p (split(/,/, $port)) {
+ unless ($p =~ /^\d+$/) {
+ next SET_COOKIE;
+ }
+ $found++ if $p eq $req_port;
+ }
+ unless ($found) {
+ next SET_COOKIE;
+ }
+ }
+ else {
+ $port = "_$req_port";
+ }
+ }
+ $self->set_cookie($version,$key,$val,$path,$domain,$port,$path_spec,$secure,$maxage,$discard, \%hash)
+ if $self->set_cookie_ok(\%orig_hash);
+ }
+
+ $response;
+}
+
+sub set_cookie_ok
+{
+ 1;
+}
+
+
+sub set_cookie
+{
+ my $self = shift;
+ my($version,
+ $key, $val, $path, $domain, $port,
+ $path_spec, $secure, $maxage, $discard, $rest) = @_;
+
+ # path and key can not be empty (key can't start with '$')
+ return $self if !defined($path) || $path !~ m,^/, ||
+ !defined($key) || $key =~ m,^\$,;
+
+ # ensure legal port
+ if (defined $port) {
+ return $self unless $port =~ /^_?\d+(?:,\d+)*$/;
+ }
+
+ my $expires;
+ if (defined $maxage) {
+ if ($maxage <= 0) {
+ delete $self->{COOKIES}{$domain}{$path}{$key};
+ return $self;
+ }
+ $expires = time() + $maxage;
+ }
+ $version = 0 unless defined $version;
+
+ my @array = ($version, $val,$port,
+ $path_spec,
+ $secure, $expires, $discard);
+ push(@array, {%$rest}) if defined($rest) && %$rest;
+ # trim off undefined values at end
+ pop(@array) while !defined $array[-1];
+
+ $self->{COOKIES}{$domain}{$path}{$key} = \@array;
+ $self;
+}
+
+
+sub save
+{
+ my $self = shift;
+ my $file = shift || $self->{'file'} || return;
+ local(*FILE);
+ open(FILE, ">$file") or die "Can't open $file: $!";
+ print FILE "#LWP-Cookies-1.0\n";
+ print FILE $self->as_string(!$self->{ignore_discard});
+ close(FILE);
+ 1;
+}
+
+
+sub load
+{
+ my $self = shift;
+ my $file = shift || $self->{'file'} || return;
+ local(*FILE, $_);
+ local $/ = "\n"; # make sure we got standard record separator
+ open(FILE, $file) or return;
+ my $magic = <FILE>;
+ unless ($magic =~ /^\#LWP-Cookies-(\d+\.\d+)/) {
+ warn "$file does not seem to contain cookies";
+ return;
+ }
+ while (<FILE>) {
+ next unless s/^Set-Cookie3:\s*//;
+ chomp;
+ my $cookie;
+ for $cookie (_split_header_words($_)) {
+ my($key,$val) = splice(@$cookie, 0, 2);
+ my %hash;
+ while (@$cookie) {
+ my $k = shift @$cookie;
+ my $v = shift @$cookie;
+ $hash{$k} = $v;
+ }
+ my $version = delete $hash{version};
+ my $path = delete $hash{path};
+ my $domain = delete $hash{domain};
+ my $port = delete $hash{port};
+ my $expires = str2time(delete $hash{expires});
+
+ my $path_spec = exists $hash{path_spec}; delete $hash{path_spec};
+ my $secure = exists $hash{secure}; delete $hash{secure};
+ my $discard = exists $hash{discard}; delete $hash{discard};
+
+ my @array = ($version,$val,$port,
+ $path_spec,$secure,$expires,$discard);
+ push(@array, \%hash) if %hash;
+ $self->{COOKIES}{$domain}{$path}{$key} = \@array;
+ }
+ }
+ close(FILE);
+ 1;
+}
+
+
+sub revert
+{
+ my $self = shift;
+ $self->clear->load;
+ $self;
+}
+
+
+sub clear
+{
+ my $self = shift;
+ if (@_ == 0) {
+ $self->{COOKIES} = {};
+ }
+ elsif (@_ == 1) {
+ delete $self->{COOKIES}{$_[0]};
+ }
+ elsif (@_ == 2) {
+ delete $self->{COOKIES}{$_[0]}{$_[1]};
+ }
+ elsif (@_ == 3) {
+ delete $self->{COOKIES}{$_[0]}{$_[1]}{$_[2]};
+ }
+ else {
+ require Carp;
+ Carp::carp('Usage: $c->clear([domain [,path [,key]]])');
+ }
+ $self;
+}
+
+
+sub clear_temporary_cookies
+{
+ my($self) = @_;
+
+ $self->scan(sub {
+ if($_[9] or # "Discard" flag set
+ not $_[8]) { # No expire field?
+ $_[8] = -1; # Set the expire/max_age field
+ $self->set_cookie(@_); # Clear the cookie
+ }
+ });
+}
+
+
+sub DESTROY
+{
+ my $self = shift;
+ local($., $@, $!, $^E, $?);
+ $self->save if $self->{'autosave'};
+}
+
+
+sub scan
+{
+ my($self, $cb) = @_;
+ my($domain,$path,$key);
+ for $domain (sort keys %{$self->{COOKIES}}) {
+ for $path (sort keys %{$self->{COOKIES}{$domain}}) {
+ for $key (sort keys %{$self->{COOKIES}{$domain}{$path}}) {
+ my($version,$val,$port,$path_spec,
+ $secure,$expires,$discard,$rest) =
+ @{$self->{COOKIES}{$domain}{$path}{$key}};
+ $rest = {} unless defined($rest);
+ &$cb($version,$key,$val,$path,$domain,$port,
+ $path_spec,$secure,$expires,$discard,$rest);
+ }
+ }
+ }
+}
+
+
+sub as_string
+{
+ my($self, $skip_discard) = @_;
+ my @res;
+ $self->scan(sub {
+ my($version,$key,$val,$path,$domain,$port,
+ $path_spec,$secure,$expires,$discard,$rest) = @_;
+ return if $discard && $skip_discard;
+ my @h = ($key, $val);
+ push(@h, "path", $path);
+ push(@h, "domain" => $domain);
+ push(@h, "port" => $port) if defined $port;
+ push(@h, "path_spec" => undef) if $path_spec;
+ push(@h, "secure" => undef) if $secure;
+ push(@h, "expires" => HTTP::Date::time2isoz($expires)) if $expires;
+ push(@h, "discard" => undef) if $discard;
+ my $k;
+ for $k (sort keys %$rest) {
+ push(@h, $k, $rest->{$k});
+ }
+ push(@h, "version" => $version);
+ push(@res, "Set-Cookie3: " . join_header_words(\@h));
+ });
+ join("\n", @res, "");
+}
+
+sub _host
+{
+ my($request, $url) = @_;
+ if (my $h = $request->header("Host")) {
+ $h =~ s/:\d+$//; # might have a port as well
+ return lc($h);
+ }
+ return lc($url->host);
+}
+
+sub _url_path
+{
+ my $url = shift;
+ my $path;
+ if($url->can('epath')) {
+ $path = $url->epath; # URI::URL method
+ }
+ else {
+ $path = $url->path; # URI::_generic method
+ }
+ $path = "/" unless length $path;
+ $path;
+}
+
+sub _normalize_path # so that plain string compare can be used
+{
+ my $x;
+ $_[0] =~ s/%([0-9a-fA-F][0-9a-fA-F])/
+ $x = uc($1);
+ $x eq "2F" || $x eq "25" ? "%$x" :
+ pack("C", hex($x));
+ /eg;
+ $_[0] =~ s/([\0-\x20\x7f-\xff])/sprintf("%%%02X",ord($1))/eg;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Cookies - HTTP cookie jars
+
+=head1 SYNOPSIS
+
+ use HTTP::Cookies;
+ $cookie_jar = HTTP::Cookies->new(
+ file => "$ENV{'HOME'}/lwp_cookies.dat',
+ autosave => 1,
+ );
+
+ use LWP;
+ my $browser = LWP::UserAgent->new;
+ $browser->cookie_jar($cookie_jar);
+
+Or for an empty and temporary cookie jar:
+
+ use LWP;
+ my $browser = LWP::UserAgent->new;
+ $browser->cookie_jar( {} );
+
+=head1 DESCRIPTION
+
+This class is for objects that represent a "cookie jar" -- that is, a
+database of all the HTTP cookies that a given LWP::UserAgent object
+knows about.
+
+Cookies are a general mechanism which server side connections can use
+to both store and retrieve information on the client side of the
+connection. For more information about cookies refer to
+<URL:http://curl.haxx.se/rfc/cookie_spec.html> and
+<URL:http://www.cookiecentral.com/>. This module also implements the
+new style cookies described in I<RFC 2965>.
+The two variants of cookies are supposed to be able to coexist happily.
+
+Instances of the class I<HTTP::Cookies> are able to store a collection
+of Set-Cookie2: and Set-Cookie: headers and are able to use this
+information to initialize Cookie-headers in I<HTTP::Request> objects.
+The state of a I<HTTP::Cookies> object can be saved in and restored from
+files.
+
+=head1 METHODS
+
+The following methods are provided:
+
+=over 4
+
+=item $cookie_jar = HTTP::Cookies->new
+
+The constructor takes hash style parameters. The following
+parameters are recognized:
+
+ file: name of the file to restore cookies from and save cookies to
+ autosave: save during destruction (bool)
+ ignore_discard: save even cookies that are requested to be discarded (bool)
+ hide_cookie2: do not add Cookie2 header to requests
+
+Future parameters might include (not yet implemented):
+
+ max_cookies 300
+ max_cookies_per_domain 20
+ max_cookie_size 4096
+
+ no_cookies list of domain names that we never return cookies to
+
+=item $cookie_jar->add_cookie_header( $request )
+
+The add_cookie_header() method will set the appropriate Cookie:-header
+for the I<HTTP::Request> object given as argument. The $request must
+have a valid url attribute before this method is called.
+
+=item $cookie_jar->extract_cookies( $response )
+
+The extract_cookies() method will look for Set-Cookie: and
+Set-Cookie2: headers in the I<HTTP::Response> object passed as
+argument. Any of these headers that are found are used to update
+the state of the $cookie_jar.
+
+=item $cookie_jar->set_cookie( $version, $key, $val, $path, $domain, $port, $path_spec, $secure, $maxage, $discard, \%rest )
+
+The set_cookie() method updates the state of the $cookie_jar. The
+$key, $val, $domain, $port and $path arguments are strings. The
+$path_spec, $secure, $discard arguments are boolean values. The $maxage
+value is a number indicating number of seconds that this cookie will
+live. A value <= 0 will delete this cookie. %rest defines
+various other attributes like "Comment" and "CommentURL".
+
+=item $cookie_jar->save
+
+=item $cookie_jar->save( $file )
+
+This method file saves the state of the $cookie_jar to a file.
+The state can then be restored later using the load() method. If a
+filename is not specified we will use the name specified during
+construction. If the attribute I<ignore_discard> is set, then we
+will even save cookies that are marked to be discarded.
+
+The default is to save a sequence of "Set-Cookie3" lines.
+"Set-Cookie3" is a proprietary LWP format, not known to be compatible
+with any browser. The I<HTTP::Cookies::Netscape> sub-class can
+be used to save in a format compatible with Netscape.
+
+=item $cookie_jar->load
+
+=item $cookie_jar->load( $file )
+
+This method reads the cookies from the file and adds them to the
+$cookie_jar. The file must be in the format written by the save()
+method.
+
+=item $cookie_jar->revert
+
+This method empties the $cookie_jar and re-loads the $cookie_jar
+from the last save file.
+
+=item $cookie_jar->clear
+
+=item $cookie_jar->clear( $domain )
+
+=item $cookie_jar->clear( $domain, $path )
+
+=item $cookie_jar->clear( $domain, $path, $key )
+
+Invoking this method without arguments will empty the whole
+$cookie_jar. If given a single argument only cookies belonging to
+that domain will be removed. If given two arguments, cookies
+belonging to the specified path within that domain are removed. If
+given three arguments, then the cookie with the specified key, path
+and domain is removed.
+
+=item $cookie_jar->clear_temporary_cookies
+
+Discard all temporary cookies. Scans for all cookies in the jar
+with either no expire field or a true C<discard> flag. To be
+called when the user agent shuts down according to RFC 2965.
+
+=item $cookie_jar->scan( \&callback )
+
+The argument is a subroutine that will be invoked for each cookie
+stored in the $cookie_jar. The subroutine will be invoked with
+the following arguments:
+
+ 0 version
+ 1 key
+ 2 val
+ 3 path
+ 4 domain
+ 5 port
+ 6 path_spec
+ 7 secure
+ 8 expires
+ 9 discard
+ 10 hash
+
+=item $cookie_jar->as_string
+
+=item $cookie_jar->as_string( $skip_discardables )
+
+The as_string() method will return the state of the $cookie_jar
+represented as a sequence of "Set-Cookie3" header lines separated by
+"\n". If $skip_discardables is TRUE, it will not return lines for
+cookies with the I<Discard> attribute.
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Cookies::Netscape>, L<HTTP::Cookies::Microsoft>
+
+=head1 COPYRIGHT
+
+Copyright 1997-2002 Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Cookies::Microsoft;
+
+use strict;
+
+use vars qw(@ISA $VERSION);
+
+$VERSION = "5.821";
+
+require HTTP::Cookies;
+@ISA=qw(HTTP::Cookies);
+
+sub load_cookies_from_file
+{
+ my ($file) = @_;
+ my @cookies;
+ my ($key, $value, $domain_path, $flags, $lo_expire, $hi_expire);
+ my ($lo_create, $hi_create, $sep);
+
+ open(COOKIES, $file) || return;
+
+ while ($key = <COOKIES>)
+ {
+ chomp($key);
+ chomp($value = <COOKIES>);
+ chomp($domain_path= <COOKIES>);
+ chomp($flags = <COOKIES>); # 0x0001 bit is for secure
+ chomp($lo_expire = <COOKIES>);
+ chomp($hi_expire = <COOKIES>);
+ chomp($lo_create = <COOKIES>);
+ chomp($hi_create = <COOKIES>);
+ chomp($sep = <COOKIES>);
+
+ if (!defined($key) || !defined($value) || !defined($domain_path) ||
+ !defined($flags) || !defined($hi_expire) || !defined($lo_expire) ||
+ !defined($hi_create) || !defined($lo_create) || !defined($sep) ||
+ ($sep ne '*'))
+ {
+ last;
+ }
+
+ if ($domain_path =~ /^([^\/]+)(\/.*)$/)
+ {
+ my $domain = $1;
+ my $path = $2;
+
+ push(@cookies, {KEY => $key, VALUE => $value, DOMAIN => $domain,
+ PATH => $path, FLAGS =>$flags, HIXP =>$hi_expire,
+ LOXP => $lo_expire, HICREATE => $hi_create,
+ LOCREATE => $lo_create});
+ }
+ }
+
+ return \@cookies;
+}
+
+sub get_user_name
+{
+ use Win32;
+ use locale;
+ my $user = lc(Win32::LoginName());
+
+ return $user;
+}
+
+# MSIE stores create and expire times as Win32 FILETIME,
+# which is 64 bits of 100 nanosecond intervals since Jan 01 1601
+#
+# But Cookies code expects time in 32-bit value expressed
+# in seconds since Jan 01 1970
+#
+sub epoch_time_offset_from_win32_filetime
+{
+ my ($high, $low) = @_;
+
+ #--------------------------------------------------------
+ # USEFUL CONSTANT
+ #--------------------------------------------------------
+ # 0x019db1de 0xd53e8000 is 1970 Jan 01 00:00:00 in Win32 FILETIME
+ #
+ # 100 nanosecond intervals == 0.1 microsecond intervals
+
+ my $filetime_low32_1970 = 0xd53e8000;
+ my $filetime_high32_1970 = 0x019db1de;
+
+ #------------------------------------
+ # ALGORITHM
+ #------------------------------------
+ # To go from 100 nanosecond intervals to seconds since 00:00 Jan 01 1970:
+ #
+ # 1. Adjust 100 nanosecond intervals to Jan 01 1970 base
+ # 2. Divide by 10 to get to microseconds (1/millionth second)
+ # 3. Divide by 1000000 (10 ^ 6) to get to seconds
+ #
+ # We can combine Step 2 & 3 into one divide.
+ #
+ # After much trial and error, I came up with the following code which
+ # avoids using Math::BigInt or floating pt, but still gives correct answers
+
+ # If the filetime is before the epoch, return 0
+ if (($high < $filetime_high32_1970) ||
+ (($high == $filetime_high32_1970) && ($low < $filetime_low32_1970)))
+ {
+ return 0;
+ }
+
+ # Can't multiply by 0x100000000, (1 << 32),
+ # without Perl issuing an integer overflow warning
+ #
+ # So use two multiplies by 0x10000 instead of one multiply by 0x100000000
+ #
+ # The result is the same.
+ #
+ my $date1970 = (($filetime_high32_1970 * 0x10000) * 0x10000) + $filetime_low32_1970;
+ my $time = (($high * 0x10000) * 0x10000) + $low;
+
+ $time -= $date1970;
+ $time /= 10000000;
+
+ return $time;
+}
+
+sub load_cookie
+{
+ my($self, $file) = @_;
+ my $now = time() - $HTTP::Cookies::EPOCH_OFFSET;
+ my $cookie_data;
+
+ if (-f $file)
+ {
+ # open the cookie file and get the data
+ $cookie_data = load_cookies_from_file($file);
+
+ foreach my $cookie (@{$cookie_data})
+ {
+ my $secure = ($cookie->{FLAGS} & 1) != 0;
+ my $expires = epoch_time_offset_from_win32_filetime($cookie->{HIXP}, $cookie->{LOXP});
+
+ $self->set_cookie(undef, $cookie->{KEY}, $cookie->{VALUE},
+ $cookie->{PATH}, $cookie->{DOMAIN}, undef,
+ 0, $secure, $expires-$now, 0);
+ }
+ }
+}
+
+sub load
+{
+ my($self, $cookie_index) = @_;
+ my $now = time() - $HTTP::Cookies::EPOCH_OFFSET;
+ my $cookie_dir = '';
+ my $delay_load = (defined($self->{'delayload'}) && $self->{'delayload'});
+ my $user_name = get_user_name();
+ my $data;
+
+ $cookie_index ||= $self->{'file'} || return;
+ if ($cookie_index =~ /[\\\/][^\\\/]+$/)
+ {
+ $cookie_dir = $` . "\\";
+ }
+
+ local(*INDEX, $_);
+
+ open(INDEX, $cookie_index) || return;
+ binmode(INDEX);
+ if (256 != read(INDEX, $data, 256))
+ {
+ warn "$cookie_index file is not large enough";
+ close(INDEX);
+ return;
+ }
+
+ # Cookies' index.dat file starts with 32 bytes of signature
+ # followed by an offset to the first record, stored as a little-endian DWORD
+ my ($sig, $size) = unpack('a32 V', $data);
+
+ if (($sig !~ /^Client UrlCache MMF Ver 5\.2/) || # check that sig is valid (only tested in IE6.0)
+ (0x4000 != $size))
+ {
+ warn "$cookie_index ['$sig' $size] does not seem to contain cookies";
+ close(INDEX);
+ return;
+ }
+
+ if (0 == seek(INDEX, $size, 0)) # move the file ptr to start of the first record
+ {
+ close(INDEX);
+ return;
+ }
+
+ # Cookies are usually stored in 'URL ' records in two contiguous 0x80 byte sectors (256 bytes)
+ # so read in two 0x80 byte sectors and adjust if not a Cookie.
+ while (256 == read(INDEX, $data, 256))
+ {
+ # each record starts with a 4-byte signature
+ # and a count (little-endian DWORD) of 0x80 byte sectors for the record
+ ($sig, $size) = unpack('a4 V', $data);
+
+ # Cookies are found in 'URL ' records
+ if ('URL ' ne $sig)
+ {
+ # skip over uninteresting record: I've seen 'HASH' and 'LEAK' records
+ if (($sig eq 'HASH') || ($sig eq 'LEAK'))
+ {
+ # '-2' takes into account the two 0x80 byte sectors we've just read in
+ if (($size > 0) && ($size != 2))
+ {
+ if (0 == seek(INDEX, ($size-2)*0x80, 1))
+ {
+ # Seek failed. Something's wrong. Gonna stop.
+ last;
+ }
+ }
+ }
+ next;
+ }
+
+ #$REMOVE Need to check if URL records in Cookies' index.dat will
+ # ever use more than two 0x80 byte sectors
+ if ($size > 2)
+ {
+ my $more_data = ($size-2)*0x80;
+
+ if ($more_data != read(INDEX, $data, $more_data, 256))
+ {
+ last;
+ }
+ }
+
+ (my $user_name2 = $user_name) =~ s/ /_/g;
+ if ($data =~ /Cookie\:\Q$user_name\E\@([\x21-\xFF]+).*?((?:\Q$user_name\E|\Q$user_name2\E)\@[\x21-\xFF]+\.txt)/)
+ {
+ my $cookie_file = $cookie_dir . $2; # form full pathname
+
+ if (!$delay_load)
+ {
+ $self->load_cookie($cookie_file);
+ }
+ else
+ {
+ my $domain = $1;
+
+ # grab only the domain name, drop everything from the first dir sep on
+ if ($domain =~ m{[\\/]})
+ {
+ $domain = $`;
+ }
+
+ # set the delayload cookie for this domain with
+ # the cookie_file as cookie for later-loading info
+ $self->set_cookie(undef, 'cookie', $cookie_file,
+ '//+delayload', $domain, undef,
+ 0, 0, $now+86400, 0);
+ }
+ }
+ }
+
+ close(INDEX);
+
+ 1;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Cookies::Microsoft - access to Microsoft cookies files
+
+=head1 SYNOPSIS
+
+ use LWP;
+ use HTTP::Cookies::Microsoft;
+ use Win32::TieRegistry(Delimiter => "/");
+ my $cookies_dir = $Registry->
+ {"CUser/Software/Microsoft/Windows/CurrentVersion/Explorer/Shell Folders/Cookies"};
+
+ $cookie_jar = HTTP::Cookies::Microsoft->new(
+ file => "$cookies_dir\\index.dat",
+ 'delayload' => 1,
+ );
+ my $browser = LWP::UserAgent->new;
+ $browser->cookie_jar( $cookie_jar );
+
+=head1 DESCRIPTION
+
+This is a subclass of C<HTTP::Cookies> which
+loads Microsoft Internet Explorer 5.x and 6.x for Windows (MSIE)
+cookie files.
+
+See the documentation for L<HTTP::Cookies>.
+
+=head1 METHODS
+
+The following methods are provided:
+
+=over 4
+
+=item $cookie_jar = HTTP::Cookies::Microsoft->new;
+
+The constructor takes hash style parameters. In addition
+to the regular HTTP::Cookies parameters, HTTP::Cookies::Microsoft
+recognizes the following:
+
+ delayload: delay loading of cookie data until a request
+ is actually made. This results in faster
+ runtime unless you use most of the cookies
+ since only the domain's cookie data
+ is loaded on demand.
+
+=back
+
+=head1 CAVEATS
+
+Please note that the code DOESN'T support saving to the MSIE
+cookie file format.
+
+=head1 AUTHOR
+
+Johnny Lee <typo_pl@hotmail.com>
+
+=head1 COPYRIGHT
+
+Copyright 2002 Johnny Lee
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
--- /dev/null
+package HTTP::Cookies::Netscape;
+
+use strict;
+use vars qw(@ISA $VERSION);
+
+$VERSION = "5.832";
+
+require HTTP::Cookies;
+@ISA=qw(HTTP::Cookies);
+
+sub load
+{
+ my($self, $file) = @_;
+ $file ||= $self->{'file'} || return;
+ local(*FILE, $_);
+ local $/ = "\n"; # make sure we got standard record separator
+ my @cookies;
+ open(FILE, $file) || return;
+ my $magic = <FILE>;
+ unless ($magic =~ /^\#(?: Netscape)? HTTP Cookie File/) {
+ warn "$file does not look like a netscape cookies file" if $^W;
+ close(FILE);
+ return;
+ }
+ my $now = time() - $HTTP::Cookies::EPOCH_OFFSET;
+ while (<FILE>) {
+ next if /^\s*\#/;
+ next if /^\s*$/;
+ tr/\n\r//d;
+ my($domain,$bool1,$path,$secure, $expires,$key,$val) = split(/\t/, $_);
+ $secure = ($secure eq "TRUE");
+ $self->set_cookie(undef,$key,$val,$path,$domain,undef,
+ 0,$secure,$expires-$now, 0);
+ }
+ close(FILE);
+ 1;
+}
+
+sub save
+{
+ my($self, $file) = @_;
+ $file ||= $self->{'file'} || return;
+ local(*FILE, $_);
+ open(FILE, ">$file") || return;
+
+ # Use old, now broken link to the old cookie spec just in case something
+ # else (not us!) requires the comment block exactly this way.
+ print FILE <<EOT;
+# Netscape HTTP Cookie File
+# http://www.netscape.com/newsref/std/cookie_spec.html
+# This is a generated file! Do not edit.
+
+EOT
+
+ my $now = time - $HTTP::Cookies::EPOCH_OFFSET;
+ $self->scan(sub {
+ my($version,$key,$val,$path,$domain,$port,
+ $path_spec,$secure,$expires,$discard,$rest) = @_;
+ return if $discard && !$self->{ignore_discard};
+ $expires = $expires ? $expires - $HTTP::Cookies::EPOCH_OFFSET : 0;
+ return if $now > $expires;
+ $secure = $secure ? "TRUE" : "FALSE";
+ my $bool = $domain =~ /^\./ ? "TRUE" : "FALSE";
+ print FILE join("\t", $domain, $bool, $path, $secure, $expires, $key, $val), "\n";
+ });
+ close(FILE);
+ 1;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+HTTP::Cookies::Netscape - access to Netscape cookies files
+
+=head1 SYNOPSIS
+
+ use LWP;
+ use HTTP::Cookies::Netscape;
+ $cookie_jar = HTTP::Cookies::Netscape->new(
+ file => "c:/program files/netscape/users/ZombieCharity/cookies.txt",
+ );
+ my $browser = LWP::UserAgent->new;
+ $browser->cookie_jar( $cookie_jar );
+
+=head1 DESCRIPTION
+
+This is a subclass of C<HTTP::Cookies> that reads (and optionally
+writes) Netscape/Mozilla cookie files.
+
+See the documentation for L<HTTP::Cookies>.
+
+=head1 CAVEATS
+
+Please note that the Netscape/Mozilla cookie file format can't store
+all the information available in the Set-Cookie2 headers, so you will
+probably lose some information if you save in this format.
+
+At time of writing, this module seems to work fine with Mozilla
+Phoenix/Firebird.
+
+=head1 SEE ALSO
+
+L<HTTP::Cookies::Microsoft>
+
+=head1 COPYRIGHT
+
+Copyright 2002-2003 Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package HTTP::Daemon;
+
+use strict;
+use vars qw($VERSION @ISA $PROTO $DEBUG);
+
+$VERSION = "5.827";
+
+use IO::Socket qw(AF_INET INADDR_ANY inet_ntoa);
+@ISA=qw(IO::Socket::INET);
+
+$PROTO = "HTTP/1.1";
+
+
+sub new
+{
+ my($class, %args) = @_;
+ $args{Listen} ||= 5;
+ $args{Proto} ||= 'tcp';
+ return $class->SUPER::new(%args);
+}
+
+
+sub accept
+{
+ my $self = shift;
+ my $pkg = shift || "HTTP::Daemon::ClientConn";
+ my ($sock, $peer) = $self->SUPER::accept($pkg);
+ if ($sock) {
+ ${*$sock}{'httpd_daemon'} = $self;
+ return wantarray ? ($sock, $peer) : $sock;
+ }
+ else {
+ return;
+ }
+}
+
+
+sub url
+{
+ my $self = shift;
+ my $url = $self->_default_scheme . "://";
+ my $addr = $self->sockaddr;
+ if (!$addr || $addr eq INADDR_ANY) {
+ require Sys::Hostname;
+ $url .= lc Sys::Hostname::hostname();
+ }
+ else {
+ $url .= gethostbyaddr($addr, AF_INET) || inet_ntoa($addr);
+ }
+ my $port = $self->sockport;
+ $url .= ":$port" if $port != $self->_default_port;
+ $url .= "/";
+ $url;
+}
+
+
+sub _default_port {
+ 80;
+}
+
+
+sub _default_scheme {
+ "http";
+}
+
+
+sub product_tokens
+{
+ "libwww-perl-daemon/$HTTP::Daemon::VERSION";
+}
+
+
+
+package HTTP::Daemon::ClientConn;
+
+use vars qw(@ISA $DEBUG);
+use IO::Socket ();
+@ISA=qw(IO::Socket::INET);
+*DEBUG = \$HTTP::Daemon::DEBUG;
+
+use HTTP::Request ();
+use HTTP::Response ();
+use HTTP::Status;
+use HTTP::Date qw(time2str);
+use LWP::MediaTypes qw(guess_media_type);
+use Carp ();
+
+my $CRLF = "\015\012"; # "\r\n" is not portable
+my $HTTP_1_0 = _http_version("HTTP/1.0");
+my $HTTP_1_1 = _http_version("HTTP/1.1");
+
+
+sub get_request
+{
+ my($self, $only_headers) = @_;
+ if (${*$self}{'httpd_nomore'}) {
+ $self->reason("No more requests from this connection");
+ return;
+ }
+
+ $self->reason("");
+ my $buf = ${*$self}{'httpd_rbuf'};
+ $buf = "" unless defined $buf;
+
+ my $timeout = $ {*$self}{'io_socket_timeout'};
+ my $fdset = "";
+ vec($fdset, $self->fileno, 1) = 1;
+ local($_);
+
+ READ_HEADER:
+ while (1) {
+ # loop until we have the whole header in $buf
+ $buf =~ s/^(?:\015?\012)+//; # ignore leading blank lines
+ if ($buf =~ /\012/) { # potential, has at least one line
+ if ($buf =~ /^\w+[^\012]+HTTP\/\d+\.\d+\015?\012/) {
+ if ($buf =~ /\015?\012\015?\012/) {
+ last READ_HEADER; # we have it
+ }
+ elsif (length($buf) > 16*1024) {
+ $self->send_error(413); # REQUEST_ENTITY_TOO_LARGE
+ $self->reason("Very long header");
+ return;
+ }
+ }
+ else {
+ last READ_HEADER; # HTTP/0.9 client
+ }
+ }
+ elsif (length($buf) > 16*1024) {
+ $self->send_error(414); # REQUEST_URI_TOO_LARGE
+ $self->reason("Very long first line");
+ return;
+ }
+ print STDERR "Need more data for complete header\n" if $DEBUG;
+ return unless $self->_need_more($buf, $timeout, $fdset);
+ }
+ if ($buf !~ s/^(\S+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) {
+ ${*$self}{'httpd_client_proto'} = _http_version("HTTP/1.0");
+ $self->send_error(400); # BAD_REQUEST
+ $self->reason("Bad request line: $buf");
+ return;
+ }
+ my $method = $1;
+ my $uri = $2;
+ my $proto = $3 || "HTTP/0.9";
+ $uri = "http://$uri" if $method eq "CONNECT";
+ $uri = $HTTP::URI_CLASS->new($uri, $self->daemon->url);
+ my $r = HTTP::Request->new($method, $uri);
+ $r->protocol($proto);
+ ${*$self}{'httpd_client_proto'} = $proto = _http_version($proto);
+ ${*$self}{'httpd_head'} = ($method eq "HEAD");
+
+ if ($proto >= $HTTP_1_0) {
+ # we expect to find some headers
+ my($key, $val);
+ HEADER:
+ while ($buf =~ s/^([^\012]*)\012//) {
+ $_ = $1;
+ s/\015$//;
+ if (/^([^:\s]+)\s*:\s*(.*)/) {
+ $r->push_header($key, $val) if $key;
+ ($key, $val) = ($1, $2);
+ }
+ elsif (/^\s+(.*)/) {
+ $val .= " $1";
+ }
+ else {
+ last HEADER;
+ }
+ }
+ $r->push_header($key, $val) if $key;
+ }
+
+ my $conn = $r->header('Connection');
+ if ($proto >= $HTTP_1_1) {
+ ${*$self}{'httpd_nomore'}++ if $conn && lc($conn) =~ /\bclose\b/;
+ }
+ else {
+ ${*$self}{'httpd_nomore'}++ unless $conn &&
+ lc($conn) =~ /\bkeep-alive\b/;
+ }
+
+ if ($only_headers) {
+ ${*$self}{'httpd_rbuf'} = $buf;
+ return $r;
+ }
+
+ # Find out how much content to read
+ my $te = $r->header('Transfer-Encoding');
+ my $ct = $r->header('Content-Type');
+ my $len = $r->header('Content-Length');
+
+ # Act on the Expect header, if it's there
+ for my $e ( $r->header('Expect') ) {
+ if( lc($e) eq '100-continue' ) {
+ $self->send_status_line(100);
+ $self->send_crlf;
+ }
+ else {
+ $self->send_error(417);
+ $self->reason("Unsupported Expect header value");
+ return;
+ }
+ }
+
+ if ($te && lc($te) eq 'chunked') {
+ # Handle chunked transfer encoding
+ my $body = "";
+ CHUNK:
+ while (1) {
+ print STDERR "Chunked\n" if $DEBUG;
+ if ($buf =~ s/^([^\012]*)\012//) {
+ my $chunk_head = $1;
+ unless ($chunk_head =~ /^([0-9A-Fa-f]+)/) {
+ $self->send_error(400);
+ $self->reason("Bad chunk header $chunk_head");
+ return;
+ }
+ my $size = hex($1);
+ last CHUNK if $size == 0;
+
+ my $missing = $size - length($buf) + 2; # 2=CRLF at chunk end
+ # must read until we have a complete chunk
+ while ($missing > 0) {
+ print STDERR "Need $missing more bytes\n" if $DEBUG;
+ my $n = $self->_need_more($buf, $timeout, $fdset);
+ return unless $n;
+ $missing -= $n;
+ }
+ $body .= substr($buf, 0, $size);
+ substr($buf, 0, $size+2) = '';
+
+ }
+ else {
+ # need more data in order to have a complete chunk header
+ return unless $self->_need_more($buf, $timeout, $fdset);
+ }
+ }
+ $r->content($body);
+
+ # pretend it was a normal entity body
+ $r->remove_header('Transfer-Encoding');
+ $r->header('Content-Length', length($body));
+
+ my($key, $val);
+ FOOTER:
+ while (1) {
+ if ($buf !~ /\012/) {
+ # need at least one line to look at
+ return unless $self->_need_more($buf, $timeout, $fdset);
+ }
+ else {
+ $buf =~ s/^([^\012]*)\012//;
+ $_ = $1;
+ s/\015$//;
+ if (/^([\w\-]+)\s*:\s*(.*)/) {
+ $r->push_header($key, $val) if $key;
+ ($key, $val) = ($1, $2);
+ }
+ elsif (/^\s+(.*)/) {
+ $val .= " $1";
+ }
+ elsif (!length) {
+ last FOOTER;
+ }
+ else {
+ $self->reason("Bad footer syntax");
+ return;
+ }
+ }
+ }
+ $r->push_header($key, $val) if $key;
+
+ }
+ elsif ($te) {
+ $self->send_error(501); # Unknown transfer encoding
+ $self->reason("Unknown transfer encoding '$te'");
+ return;
+
+ }
+ elsif ($len) {
+ # Plain body specified by "Content-Length"
+ my $missing = $len - length($buf);
+ while ($missing > 0) {
+ print "Need $missing more bytes of content\n" if $DEBUG;
+ my $n = $self->_need_more($buf, $timeout, $fdset);
+ return unless $n;
+ $missing -= $n;
+ }
+ if (length($buf) > $len) {
+ $r->content(substr($buf,0,$len));
+ substr($buf, 0, $len) = '';
+ }
+ else {
+ $r->content($buf);
+ $buf='';
+ }
+ }
+ elsif ($ct && $ct =~ m/^multipart\/\w+\s*;.*boundary\s*=\s*("?)(\w+)\1/i) {
+ # Handle multipart content type
+ my $boundary = "$CRLF--$2--";
+ my $index;
+ while (1) {
+ $index = index($buf, $boundary);
+ last if $index >= 0;
+ # end marker not yet found
+ return unless $self->_need_more($buf, $timeout, $fdset);
+ }
+ $index += length($boundary);
+ $r->content(substr($buf, 0, $index));
+ substr($buf, 0, $index) = '';
+
+ }
+ ${*$self}{'httpd_rbuf'} = $buf;
+
+ $r;
+}
+
+
+sub _need_more
+{
+ my $self = shift;
+ #my($buf,$timeout,$fdset) = @_;
+ if ($_[1]) {
+ my($timeout, $fdset) = @_[1,2];
+ print STDERR "select(,,,$timeout)\n" if $DEBUG;
+ my $n = select($fdset,undef,undef,$timeout);
+ unless ($n) {
+ $self->reason(defined($n) ? "Timeout" : "select: $!");
+ return;
+ }
+ }
+ print STDERR "sysread()\n" if $DEBUG;
+ my $n = sysread($self, $_[0], 2048, length($_[0]));
+ $self->reason(defined($n) ? "Client closed" : "sysread: $!") unless $n;
+ $n;
+}
+
+
+sub read_buffer
+{
+ my $self = shift;
+ my $old = ${*$self}{'httpd_rbuf'};
+ if (@_) {
+ ${*$self}{'httpd_rbuf'} = shift;
+ }
+ $old;
+}
+
+
+sub reason
+{
+ my $self = shift;
+ my $old = ${*$self}{'httpd_reason'};
+ if (@_) {
+ ${*$self}{'httpd_reason'} = shift;
+ }
+ $old;
+}
+
+
+sub proto_ge
+{
+ my $self = shift;
+ ${*$self}{'httpd_client_proto'} >= _http_version(shift);
+}
+
+
+sub _http_version
+{
+ local($_) = shift;
+ return 0 unless m,^(?:HTTP/)?(\d+)\.(\d+)$,i;
+ $1 * 1000 + $2;
+}
+
+
+sub antique_client
+{
+ my $self = shift;
+ ${*$self}{'httpd_client_proto'} < $HTTP_1_0;
+}
+
+
+sub force_last_request
+{
+ my $self = shift;
+ ${*$self}{'httpd_nomore'}++;
+}
+
+sub head_request
+{
+ my $self = shift;
+ ${*$self}{'httpd_head'};
+}
+
+
+sub send_status_line
+{
+ my($self, $status, $message, $proto) = @_;
+ return if $self->antique_client;
+ $status ||= RC_OK;
+ $message ||= status_message($status) || "";
+ $proto ||= $HTTP::Daemon::PROTO || "HTTP/1.1";
+ print $self "$proto $status $message$CRLF";
+}
+
+
+sub send_crlf
+{
+ my $self = shift;
+ print $self $CRLF;
+}
+
+
+sub send_basic_header
+{
+ my $self = shift;
+ return if $self->antique_client;
+ $self->send_status_line(@_);
+ print $self "Date: ", time2str(time), $CRLF;
+ my $product = $self->daemon->product_tokens;
+ print $self "Server: $product$CRLF" if $product;
+}
+
+
+sub send_header
+{
+ my $self = shift;
+ while (@_) {
+ my($k, $v) = splice(@_, 0, 2);
+ $v = "" unless defined($v);
+ print $self "$k: $v$CRLF";
+ }
+}
+
+
+sub send_response
+{
+ my $self = shift;
+ my $res = shift;
+ if (!ref $res) {
+ $res ||= RC_OK;
+ $res = HTTP::Response->new($res, @_);
+ }
+ my $content = $res->content;
+ my $chunked;
+ unless ($self->antique_client) {
+ my $code = $res->code;
+ $self->send_basic_header($code, $res->message, $res->protocol);
+ if ($code =~ /^(1\d\d|[23]04)$/) {
+ # make sure content is empty
+ $res->remove_header("Content-Length");
+ $content = "";
+ }
+ elsif ($res->request && $res->request->method eq "HEAD") {
+ # probably OK
+ }
+ elsif (ref($content) eq "CODE") {
+ if ($self->proto_ge("HTTP/1.1")) {
+ $res->push_header("Transfer-Encoding" => "chunked");
+ $chunked++;
+ }
+ else {
+ $self->force_last_request;
+ }
+ }
+ elsif (length($content)) {
+ $res->header("Content-Length" => length($content));
+ }
+ else {
+ $self->force_last_request;
+ $res->header('connection','close');
+ }
+ print $self $res->headers_as_string($CRLF);
+ print $self $CRLF; # separates headers and content
+ }
+ if ($self->head_request) {
+ # no content
+ }
+ elsif (ref($content) eq "CODE") {
+ while (1) {
+ my $chunk = &$content();
+ last unless defined($chunk) && length($chunk);
+ if ($chunked) {
+ printf $self "%x%s%s%s", length($chunk), $CRLF, $chunk, $CRLF;
+ }
+ else {
+ print $self $chunk;
+ }
+ }
+ print $self "0$CRLF$CRLF" if $chunked; # no trailers either
+ }
+ elsif (length $content) {
+ print $self $content;
+ }
+}
+
+
+sub send_redirect
+{
+ my($self, $loc, $status, $content) = @_;
+ $status ||= RC_MOVED_PERMANENTLY;
+ Carp::croak("Status '$status' is not redirect") unless is_redirect($status);
+ $self->send_basic_header($status);
+ my $base = $self->daemon->url;
+ $loc = $HTTP::URI_CLASS->new($loc, $base) unless ref($loc);
+ $loc = $loc->abs($base);
+ print $self "Location: $loc$CRLF";
+ if ($content) {
+ my $ct = $content =~ /^\s*</ ? "text/html" : "text/plain";
+ print $self "Content-Type: $ct$CRLF";
+ }
+ print $self $CRLF;
+ print $self $content if $content && !$self->head_request;
+ $self->force_last_request; # no use keeping the connection open
+}
+
+
+sub send_error
+{
+ my($self, $status, $error) = @_;
+ $status ||= RC_BAD_REQUEST;
+ Carp::croak("Status '$status' is not an error") unless is_error($status);
+ my $mess = status_message($status);
+ $error ||= "";
+ $mess = <<EOT;
+<title>$status $mess</title>
+<h1>$status $mess</h1>
+$error
+EOT
+ unless ($self->antique_client) {
+ $self->send_basic_header($status);
+ print $self "Content-Type: text/html$CRLF";
+ print $self "Content-Length: " . length($mess) . $CRLF;
+ print $self $CRLF;
+ }
+ print $self $mess unless $self->head_request;
+ $status;
+}
+
+
+sub send_file_response
+{
+ my($self, $file) = @_;
+ if (-d $file) {
+ $self->send_dir($file);
+ }
+ elsif (-f _) {
+ # plain file
+ local(*F);
+ sysopen(F, $file, 0) or
+ return $self->send_error(RC_FORBIDDEN);
+ binmode(F);
+ my($ct,$ce) = guess_media_type($file);
+ my($size,$mtime) = (stat _)[7,9];
+ unless ($self->antique_client) {
+ $self->send_basic_header;
+ print $self "Content-Type: $ct$CRLF";
+ print $self "Content-Encoding: $ce$CRLF" if $ce;
+ print $self "Content-Length: $size$CRLF" if $size;
+ print $self "Last-Modified: ", time2str($mtime), "$CRLF" if $mtime;
+ print $self $CRLF;
+ }
+ $self->send_file(\*F) unless $self->head_request;
+ return RC_OK;
+ }
+ else {
+ $self->send_error(RC_NOT_FOUND);
+ }
+}
+
+
+sub send_dir
+{
+ my($self, $dir) = @_;
+ $self->send_error(RC_NOT_FOUND) unless -d $dir;
+ $self->send_error(RC_NOT_IMPLEMENTED);
+}
+
+
+sub send_file
+{
+ my($self, $file) = @_;
+ my $opened = 0;
+ local(*FILE);
+ if (!ref($file)) {
+ open(FILE, $file) || return undef;
+ binmode(FILE);
+ $file = \*FILE;
+ $opened++;
+ }
+ my $cnt = 0;
+ my $buf = "";
+ my $n;
+ while ($n = sysread($file, $buf, 8*1024)) {
+ last if !$n;
+ $cnt += $n;
+ print $self $buf;
+ }
+ close($file) if $opened;
+ $cnt;
+}
+
+
+sub daemon
+{
+ my $self = shift;
+ ${*$self}{'httpd_daemon'};
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Daemon - a simple http server class
+
+=head1 SYNOPSIS
+
+ use HTTP::Daemon;
+ use HTTP::Status;
+
+ my $d = HTTP::Daemon->new || die;
+ print "Please contact me at: <URL:", $d->url, ">\n";
+ while (my $c = $d->accept) {
+ while (my $r = $c->get_request) {
+ if ($r->method eq 'GET' and $r->uri->path eq "/xyzzy") {
+ # remember, this is *not* recommended practice :-)
+ $c->send_file_response("/etc/passwd");
+ }
+ else {
+ $c->send_error(RC_FORBIDDEN)
+ }
+ }
+ $c->close;
+ undef($c);
+ }
+
+=head1 DESCRIPTION
+
+Instances of the C<HTTP::Daemon> class are HTTP/1.1 servers that
+listen on a socket for incoming requests. The C<HTTP::Daemon> is a
+subclass of C<IO::Socket::INET>, so you can perform socket operations
+directly on it too.
+
+The accept() method will return when a connection from a client is
+available. The returned value will be an C<HTTP::Daemon::ClientConn>
+object which is another C<IO::Socket::INET> subclass. Calling the
+get_request() method on this object will read data from the client and
+return an C<HTTP::Request> object. The ClientConn object also provide
+methods to send back various responses.
+
+This HTTP daemon does not fork(2) for you. Your application, i.e. the
+user of the C<HTTP::Daemon> is responsible for forking if that is
+desirable. Also note that the user is responsible for generating
+responses that conform to the HTTP/1.1 protocol.
+
+The following methods of C<HTTP::Daemon> are new (or enhanced) relative
+to the C<IO::Socket::INET> base class:
+
+=over 4
+
+=item $d = HTTP::Daemon->new
+
+=item $d = HTTP::Daemon->new( %opts )
+
+The constructor method takes the same arguments as the
+C<IO::Socket::INET> constructor, but unlike its base class it can also
+be called without any arguments. The daemon will then set up a listen
+queue of 5 connections and allocate some random port number.
+
+A server that wants to bind to some specific address on the standard
+HTTP port will be constructed like this:
+
+ $d = HTTP::Daemon->new(
+ LocalAddr => 'www.thisplace.com',
+ LocalPort => 80,
+ );
+
+See L<IO::Socket::INET> for a description of other arguments that can
+be used configure the daemon during construction.
+
+=item $c = $d->accept
+
+=item $c = $d->accept( $pkg )
+
+=item ($c, $peer_addr) = $d->accept
+
+This method works the same the one provided by the base class, but it
+returns an C<HTTP::Daemon::ClientConn> reference by default. If a
+package name is provided as argument, then the returned object will be
+blessed into the given class. It is probably a good idea to make that
+class a subclass of C<HTTP::Daemon::ClientConn>.
+
+The accept method will return C<undef> if timeouts have been enabled
+and no connection is made within the given time. The timeout() method
+is described in L<IO::Socket>.
+
+In list context both the client object and the peer address will be
+returned; see the description of the accept method L<IO::Socket> for
+details.
+
+=item $d->url
+
+Returns a URL string that can be used to access the server root.
+
+=item $d->product_tokens
+
+Returns the name that this server will use to identify itself. This
+is the string that is sent with the C<Server> response header. The
+main reason to have this method is that subclasses can override it if
+they want to use another product name.
+
+The default is the string "libwww-perl-daemon/#.##" where "#.##" is
+replaced with the version number of this module.
+
+=back
+
+The C<HTTP::Daemon::ClientConn> is a C<IO::Socket::INET>
+subclass. Instances of this class are returned by the accept() method
+of C<HTTP::Daemon>. The following methods are provided:
+
+=over 4
+
+=item $c->get_request
+
+=item $c->get_request( $headers_only )
+
+This method reads data from the client and turns it into an
+C<HTTP::Request> object which is returned. It returns C<undef>
+if reading fails. If it fails, then the C<HTTP::Daemon::ClientConn>
+object ($c) should be discarded, and you should not try call this
+method again on it. The $c->reason method might give you some
+information about why $c->get_request failed.
+
+The get_request() method will normally not return until the whole
+request has been received from the client. This might not be what you
+want if the request is an upload of a large file (and with chunked
+transfer encoding HTTP can even support infinite request messages -
+uploading live audio for instance). If you pass a TRUE value as the
+$headers_only argument, then get_request() will return immediately
+after parsing the request headers and you are responsible for reading
+the rest of the request content. If you are going to call
+$c->get_request again on the same connection you better read the
+correct number of bytes.
+
+=item $c->read_buffer
+
+=item $c->read_buffer( $new_value )
+
+Bytes read by $c->get_request, but not used are placed in the I<read
+buffer>. The next time $c->get_request is called it will consume the
+bytes in this buffer before reading more data from the network
+connection itself. The read buffer is invalid after $c->get_request
+has failed.
+
+If you handle the reading of the request content yourself you need to
+empty this buffer before you read more and you need to place
+unconsumed bytes here. You also need this buffer if you implement
+services like I<101 Switching Protocols>.
+
+This method always returns the old buffer content and can optionally
+replace the buffer content if you pass it an argument.
+
+=item $c->reason
+
+When $c->get_request returns C<undef> you can obtain a short string
+describing why it happened by calling $c->reason.
+
+=item $c->proto_ge( $proto )
+
+Return TRUE if the client announced a protocol with version number
+greater or equal to the given argument. The $proto argument can be a
+string like "HTTP/1.1" or just "1.1".
+
+=item $c->antique_client
+
+Return TRUE if the client speaks the HTTP/0.9 protocol. No status
+code and no headers should be returned to such a client. This should
+be the same as !$c->proto_ge("HTTP/1.0").
+
+=item $c->head_request
+
+Return TRUE if the last request was a C<HEAD> request. No content
+body must be generated for these requests.
+
+=item $c->force_last_request
+
+Make sure that $c->get_request will not try to read more requests off
+this connection. If you generate a response that is not self
+delimiting, then you should signal this fact by calling this method.
+
+This attribute is turned on automatically if the client announces
+protocol HTTP/1.0 or worse and does not include a "Connection:
+Keep-Alive" header. It is also turned on automatically when HTTP/1.1
+or better clients send the "Connection: close" request header.
+
+=item $c->send_status_line
+
+=item $c->send_status_line( $code )
+
+=item $c->send_status_line( $code, $mess )
+
+=item $c->send_status_line( $code, $mess, $proto )
+
+Send the status line back to the client. If $code is omitted 200 is
+assumed. If $mess is omitted, then a message corresponding to $code
+is inserted. If $proto is missing the content of the
+$HTTP::Daemon::PROTO variable is used.
+
+=item $c->send_crlf
+
+Send the CRLF sequence to the client.
+
+=item $c->send_basic_header
+
+=item $c->send_basic_header( $code )
+
+=item $c->send_basic_header( $code, $mess )
+
+=item $c->send_basic_header( $code, $mess, $proto )
+
+Send the status line and the "Date:" and "Server:" headers back to
+the client. This header is assumed to be continued and does not end
+with an empty CRLF line.
+
+See the description of send_status_line() for the description of the
+accepted arguments.
+
+=item $c->send_header( $field, $value )
+
+=item $c->send_header( $field1, $value1, $field2, $value2, ... )
+
+Send one or more header lines.
+
+=item $c->send_response( $res )
+
+Write a C<HTTP::Response> object to the
+client as a response. We try hard to make sure that the response is
+self delimiting so that the connection can stay persistent for further
+request/response exchanges.
+
+The content attribute of the C<HTTP::Response> object can be a normal
+string or a subroutine reference. If it is a subroutine, then
+whatever this callback routine returns is written back to the
+client as the response content. The routine will be called until it
+return an undefined or empty value. If the client is HTTP/1.1 aware
+then we will use chunked transfer encoding for the response.
+
+=item $c->send_redirect( $loc )
+
+=item $c->send_redirect( $loc, $code )
+
+=item $c->send_redirect( $loc, $code, $entity_body )
+
+Send a redirect response back to the client. The location ($loc) can
+be an absolute or relative URL. The $code must be one the redirect
+status codes, and defaults to "301 Moved Permanently"
+
+=item $c->send_error
+
+=item $c->send_error( $code )
+
+=item $c->send_error( $code, $error_message )
+
+Send an error response back to the client. If the $code is missing a
+"Bad Request" error is reported. The $error_message is a string that
+is incorporated in the body of the HTML entity body.
+
+=item $c->send_file_response( $filename )
+
+Send back a response with the specified $filename as content. If the
+file is a directory we try to generate an HTML index of it.
+
+=item $c->send_file( $filename )
+
+=item $c->send_file( $fd )
+
+Copy the file to the client. The file can be a string (which
+will be interpreted as a filename) or a reference to an C<IO::Handle>
+or glob.
+
+=item $c->daemon
+
+Return a reference to the corresponding C<HTTP::Daemon> object.
+
+=back
+
+=head1 SEE ALSO
+
+RFC 2616
+
+L<IO::Socket::INET>, L<IO::Socket>
+
+=head1 COPYRIGHT
+
+Copyright 1996-2003, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Date;
+
+$VERSION = "5.831";
+
+require 5.004;
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(time2str str2time);
+@EXPORT_OK = qw(parse_date time2iso time2isoz);
+
+use strict;
+require Time::Local;
+
+use vars qw(@DoW @MoY %MoY);
+@DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
+@MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
+@MoY{@MoY} = (1..12);
+
+my %GMT_ZONE = (GMT => 1, UTC => 1, UT => 1, Z => 1);
+
+
+sub time2str (;$)
+{
+ my $time = shift;
+ $time = time unless defined $time;
+ my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($time);
+ sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT",
+ $DoW[$wday],
+ $mday, $MoY[$mon], $year+1900,
+ $hour, $min, $sec);
+}
+
+
+sub str2time ($;$)
+{
+ my $str = shift;
+ return undef unless defined $str;
+
+ # fast exit for strictly conforming string
+ if ($str =~ /^[SMTWF][a-z][a-z], (\d\d) ([JFMAJSOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$/) {
+ return eval {
+ my $t = Time::Local::timegm($6, $5, $4, $1, $MoY{$2}-1, $3);
+ $t < 0 ? undef : $t;
+ };
+ }
+
+ my @d = parse_date($str);
+ return undef unless @d;
+ $d[1]--; # month
+
+ my $tz = pop(@d);
+ unless (defined $tz) {
+ unless (defined($tz = shift)) {
+ return eval { my $frac = $d[-1]; $frac -= ($d[-1] = int($frac));
+ my $t = Time::Local::timelocal(reverse @d) + $frac;
+ $t < 0 ? undef : $t;
+ };
+ }
+ }
+
+ my $offset = 0;
+ if ($GMT_ZONE{uc $tz}) {
+ # offset already zero
+ }
+ elsif ($tz =~ /^([-+])?(\d\d?):?(\d\d)?$/) {
+ $offset = 3600 * $2;
+ $offset += 60 * $3 if $3;
+ $offset *= -1 if $1 && $1 eq '-';
+ }
+ else {
+ eval { require Time::Zone } || return undef;
+ $offset = Time::Zone::tz_offset($tz);
+ return undef unless defined $offset;
+ }
+
+ return eval { my $frac = $d[-1]; $frac -= ($d[-1] = int($frac));
+ my $t = Time::Local::timegm(reverse @d) + $frac;
+ $t < 0 ? undef : $t - $offset;
+ };
+}
+
+
+sub parse_date ($)
+{
+ local($_) = shift;
+ return unless defined;
+
+ # More lax parsing below
+ s/^\s+//; # kill leading space
+ s/^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*//i; # Useless weekday
+
+ my($day, $mon, $yr, $hr, $min, $sec, $tz, $ampm);
+
+ # Then we are able to check for most of the formats with this regexp
+ (($day,$mon,$yr,$hr,$min,$sec,$tz) =
+ /^
+ (\d\d?) # day
+ (?:\s+|[-\/])
+ (\w+) # month
+ (?:\s+|[-\/])
+ (\d+) # year
+ (?:
+ (?:\s+|:) # separator before clock
+ (\d\d?):(\d\d) # hour:min
+ (?::(\d\d))? # optional seconds
+ )? # optional clock
+ \s*
+ ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+ \s*
+ (?:\(\w+\))? # ASCII representation of timezone in parens.
+ \s*$
+ /x)
+
+ ||
+
+ # Try the ctime and asctime format
+ (($mon, $day, $hr, $min, $sec, $tz, $yr) =
+ /^
+ (\w{1,3}) # month
+ \s+
+ (\d\d?) # day
+ \s+
+ (\d\d?):(\d\d) # hour:min
+ (?::(\d\d))? # optional seconds
+ \s+
+ (?:([A-Za-z]+)\s+)? # optional timezone
+ (\d+) # year
+ \s*$ # allow trailing whitespace
+ /x)
+
+ ||
+
+ # Then the Unix 'ls -l' date format
+ (($mon, $day, $yr, $hr, $min, $sec) =
+ /^
+ (\w{3}) # month
+ \s+
+ (\d\d?) # day
+ \s+
+ (?:
+ (\d\d\d\d) | # year
+ (\d{1,2}):(\d{2}) # hour:min
+ (?::(\d\d))? # optional seconds
+ )
+ \s*$
+ /x)
+
+ ||
+
+ # ISO 8601 format '1996-02-29 12:00:00 -0100' and variants
+ (($yr, $mon, $day, $hr, $min, $sec, $tz) =
+ /^
+ (\d{4}) # year
+ [-\/]?
+ (\d\d?) # numerical month
+ [-\/]?
+ (\d\d?) # day
+ (?:
+ (?:\s+|[-:Tt]) # separator before clock
+ (\d\d?):?(\d\d) # hour:min
+ (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
+ )? # optional clock
+ \s*
+ ([-+]?\d\d?:?(:?\d\d)?
+ |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
+ \s*$
+ /x)
+
+ ||
+
+ # Windows 'dir' 11-12-96 03:52PM
+ (($mon, $day, $yr, $hr, $min, $ampm) =
+ /^
+ (\d{2}) # numerical month
+ -
+ (\d{2}) # day
+ -
+ (\d{2}) # year
+ \s+
+ (\d\d?):(\d\d)([APap][Mm]) # hour:min AM or PM
+ \s*$
+ /x)
+
+ ||
+ return; # unrecognized format
+
+ # Translate month name to number
+ $mon = $MoY{$mon} ||
+ $MoY{"\u\L$mon"} ||
+ ($mon =~ /^\d\d?$/ && $mon >= 1 && $mon <= 12 && int($mon)) ||
+ return;
+
+ # If the year is missing, we assume first date before the current,
+ # because of the formats we support such dates are mostly present
+ # on "ls -l" listings.
+ unless (defined $yr) {
+ my $cur_mon;
+ ($cur_mon, $yr) = (localtime)[4, 5];
+ $yr += 1900;
+ $cur_mon++;
+ $yr-- if $mon > $cur_mon;
+ }
+ elsif (length($yr) < 3) {
+ # Find "obvious" year
+ my $cur_yr = (localtime)[5] + 1900;
+ my $m = $cur_yr % 100;
+ my $tmp = $yr;
+ $yr += $cur_yr - $m;
+ $m -= $tmp;
+ $yr += ($m > 0) ? 100 : -100
+ if abs($m) > 50;
+ }
+
+ # Make sure clock elements are defined
+ $hr = 0 unless defined($hr);
+ $min = 0 unless defined($min);
+ $sec = 0 unless defined($sec);
+
+ # Compensate for AM/PM
+ if ($ampm) {
+ $ampm = uc $ampm;
+ $hr = 0 if $hr == 12 && $ampm eq 'AM';
+ $hr += 12 if $ampm eq 'PM' && $hr != 12;
+ }
+
+ return($yr, $mon, $day, $hr, $min, $sec, $tz)
+ if wantarray;
+
+ if (defined $tz) {
+ $tz = "Z" if $tz =~ /^(GMT|UTC?|[-+]?0+)$/;
+ }
+ else {
+ $tz = "";
+ }
+ return sprintf("%04d-%02d-%02d %02d:%02d:%02d%s",
+ $yr, $mon, $day, $hr, $min, $sec, $tz);
+}
+
+
+sub time2iso (;$)
+{
+ my $time = shift;
+ $time = time unless defined $time;
+ my($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
+ sprintf("%04d-%02d-%02d %02d:%02d:%02d",
+ $year+1900, $mon+1, $mday, $hour, $min, $sec);
+}
+
+
+sub time2isoz (;$)
+{
+ my $time = shift;
+ $time = time unless defined $time;
+ my($sec,$min,$hour,$mday,$mon,$year) = gmtime($time);
+ sprintf("%04d-%02d-%02d %02d:%02d:%02dZ",
+ $year+1900, $mon+1, $mday, $hour, $min, $sec);
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Date - date conversion routines
+
+=head1 SYNOPSIS
+
+ use HTTP::Date;
+
+ $string = time2str($time); # Format as GMT ASCII time
+ $time = str2time($string); # convert ASCII date to machine time
+
+=head1 DESCRIPTION
+
+This module provides functions that deal the date formats used by the
+HTTP protocol (and then some more). Only the first two functions,
+time2str() and str2time(), are exported by default.
+
+=over 4
+
+=item time2str( [$time] )
+
+The time2str() function converts a machine time (seconds since epoch)
+to a string. If the function is called without an argument or with an
+undefined argument, it will use the current time.
+
+The string returned is in the format preferred for the HTTP protocol.
+This is a fixed length subset of the format defined by RFC 1123,
+represented in Universal Time (GMT). An example of a time stamp
+in this format is:
+
+ Sun, 06 Nov 1994 08:49:37 GMT
+
+=item str2time( $str [, $zone] )
+
+The str2time() function converts a string to machine time. It returns
+C<undef> if the format of $str is unrecognized, otherwise whatever the
+C<Time::Local> functions can make out of the parsed time. Dates
+before the system's epoch may not work on all operating systems. The
+time formats recognized are the same as for parse_date().
+
+The function also takes an optional second argument that specifies the
+default time zone to use when converting the date. This parameter is
+ignored if the zone is found in the date string itself. If this
+parameter is missing, and the date string format does not contain any
+zone specification, then the local time zone is assumed.
+
+If the zone is not "C<GMT>" or numerical (like "C<-0800>" or
+"C<+0100>"), then the C<Time::Zone> module must be installed in order
+to get the date recognized.
+
+=item parse_date( $str )
+
+This function will try to parse a date string, and then return it as a
+list of numerical values followed by a (possible undefined) time zone
+specifier; ($year, $month, $day, $hour, $min, $sec, $tz). The $year
+returned will B<not> have the number 1900 subtracted from it and the
+$month numbers start with 1.
+
+In scalar context the numbers are interpolated in a string of the
+"YYYY-MM-DD hh:mm:ss TZ"-format and returned.
+
+If the date is unrecognized, then the empty list is returned.
+
+The function is able to parse the following formats:
+
+ "Wed, 09 Feb 1994 22:23:32 GMT" -- HTTP format
+ "Thu Feb 3 17:03:55 GMT 1994" -- ctime(3) format
+ "Thu Feb 3 00:00:00 1994", -- ANSI C asctime() format
+ "Tuesday, 08-Feb-94 14:15:29 GMT" -- old rfc850 HTTP format
+ "Tuesday, 08-Feb-1994 14:15:29 GMT" -- broken rfc850 HTTP format
+
+ "03/Feb/1994:17:03:55 -0700" -- common logfile format
+ "09 Feb 1994 22:23:32 GMT" -- HTTP format (no weekday)
+ "08-Feb-94 14:15:29 GMT" -- rfc850 format (no weekday)
+ "08-Feb-1994 14:15:29 GMT" -- broken rfc850 format (no weekday)
+
+ "1994-02-03 14:15:29 -0100" -- ISO 8601 format
+ "1994-02-03 14:15:29" -- zone is optional
+ "1994-02-03" -- only date
+ "1994-02-03T14:15:29" -- Use T as separator
+ "19940203T141529Z" -- ISO 8601 compact format
+ "19940203" -- only date
+
+ "08-Feb-94" -- old rfc850 HTTP format (no weekday, no time)
+ "08-Feb-1994" -- broken rfc850 HTTP format (no weekday, no time)
+ "09 Feb 1994" -- proposed new HTTP format (no weekday, no time)
+ "03/Feb/1994" -- common logfile format (no time, no offset)
+
+ "Feb 3 1994" -- Unix 'ls -l' format
+ "Feb 3 17:03" -- Unix 'ls -l' format
+
+ "11-15-96 03:52PM" -- Windows 'dir' format
+
+The parser ignores leading and trailing whitespace. It also allow the
+seconds to be missing and the month to be numerical in most formats.
+
+If the year is missing, then we assume that the date is the first
+matching date I<before> current month. If the year is given with only
+2 digits, then parse_date() will select the century that makes the
+year closest to the current date.
+
+=item time2iso( [$time] )
+
+Same as time2str(), but returns a "YYYY-MM-DD hh:mm:ss"-formatted
+string representing time in the local time zone.
+
+=item time2isoz( [$time] )
+
+Same as time2str(), but returns a "YYYY-MM-DD hh:mm:ssZ"-formatted
+string representing Universal Time.
+
+
+=back
+
+=head1 SEE ALSO
+
+L<perlfunc/time>, L<Time::Zone>
+
+=head1 COPYRIGHT
+
+Copyright 1995-1999, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package HTTP::Headers;
+
+use strict;
+use Carp ();
+
+use vars qw($VERSION $TRANSLATE_UNDERSCORE);
+$VERSION = "5.835";
+
+# The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used
+# as a replacement for '-' in header field names.
+$TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE;
+
+# "Good Practice" order of HTTP message headers:
+# - General-Headers
+# - Request-Headers
+# - Response-Headers
+# - Entity-Headers
+
+my @general_headers = qw(
+ Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade
+ Via Warning
+);
+
+my @request_headers = qw(
+ Accept Accept-Charset Accept-Encoding Accept-Language
+ Authorization Expect From Host
+ If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since
+ Max-Forwards Proxy-Authorization Range Referer TE User-Agent
+);
+
+my @response_headers = qw(
+ Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server
+ Vary WWW-Authenticate
+);
+
+my @entity_headers = qw(
+ Allow Content-Encoding Content-Language Content-Length Content-Location
+ Content-MD5 Content-Range Content-Type Expires Last-Modified
+);
+
+my %entity_header = map { lc($_) => 1 } @entity_headers;
+
+my @header_order = (
+ @general_headers,
+ @request_headers,
+ @response_headers,
+ @entity_headers,
+);
+
+# Make alternative representations of @header_order. This is used
+# for sorting and case matching.
+my %header_order;
+my %standard_case;
+
+{
+ my $i = 0;
+ for (@header_order) {
+ my $lc = lc $_;
+ $header_order{$lc} = ++$i;
+ $standard_case{$lc} = $_;
+ }
+}
+
+
+
+sub new
+{
+ my($class) = shift;
+ my $self = bless {}, $class;
+ $self->header(@_) if @_; # set up initial headers
+ $self;
+}
+
+
+sub header
+{
+ my $self = shift;
+ Carp::croak('Usage: $h->header($field, ...)') unless @_;
+ my(@old);
+ my %seen;
+ while (@_) {
+ my $field = shift;
+ my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET';
+ @old = $self->_header($field, shift, $op);
+ }
+ return @old if wantarray;
+ return $old[0] if @old <= 1;
+ join(", ", @old);
+}
+
+sub clear
+{
+ my $self = shift;
+ %$self = ();
+}
+
+
+sub push_header
+{
+ my $self = shift;
+ return $self->_header(@_, 'PUSH_H') if @_ == 2;
+ while (@_) {
+ $self->_header(splice(@_, 0, 2), 'PUSH_H');
+ }
+}
+
+
+sub init_header
+{
+ Carp::croak('Usage: $h->init_header($field, $val)') if @_ != 3;
+ shift->_header(@_, 'INIT');
+}
+
+
+sub remove_header
+{
+ my($self, @fields) = @_;
+ my $field;
+ my @values;
+ foreach $field (@fields) {
+ $field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE;
+ my $v = delete $self->{lc $field};
+ push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v;
+ }
+ return @values;
+}
+
+sub remove_content_headers
+{
+ my $self = shift;
+ unless (defined(wantarray)) {
+ # fast branch that does not create return object
+ delete @$self{grep $entity_header{$_} || /^content-/, keys %$self};
+ return;
+ }
+
+ my $c = ref($self)->new;
+ for my $f (grep $entity_header{$_} || /^content-/, keys %$self) {
+ $c->{$f} = delete $self->{$f};
+ }
+ $c;
+}
+
+
+sub _header
+{
+ my($self, $field, $val, $op) = @_;
+
+ unless ($field =~ /^:/) {
+ $field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE;
+ my $old = $field;
+ $field = lc $field;
+ unless(defined $standard_case{$field}) {
+ # generate a %standard_case entry for this field
+ $old =~ s/\b(\w)/\u$1/g;
+ $standard_case{$field} = $old;
+ }
+ }
+
+ $op ||= defined($val) ? 'SET' : 'GET';
+ if ($op eq 'PUSH_H') {
+ # Like PUSH but where we don't care about the return value
+ if (exists $self->{$field}) {
+ my $h = $self->{$field};
+ if (ref($h) eq 'ARRAY') {
+ push(@$h, ref($val) eq "ARRAY" ? @$val : $val);
+ }
+ else {
+ $self->{$field} = [$h, ref($val) eq "ARRAY" ? @$val : $val]
+ }
+ return;
+ }
+ $self->{$field} = $val;
+ return;
+ }
+
+ my $h = $self->{$field};
+ my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ());
+
+ unless ($op eq 'GET' || ($op eq 'INIT' && @old)) {
+ if (defined($val)) {
+ my @new = ($op eq 'PUSH') ? @old : ();
+ if (ref($val) ne 'ARRAY') {
+ push(@new, $val);
+ }
+ else {
+ push(@new, @$val);
+ }
+ $self->{$field} = @new > 1 ? \@new : $new[0];
+ }
+ elsif ($op ne 'PUSH') {
+ delete $self->{$field};
+ }
+ }
+ @old;
+}
+
+
+sub _sorted_field_names
+{
+ my $self = shift;
+ return [ sort {
+ ($header_order{$a} || 999) <=> ($header_order{$b} || 999) ||
+ $a cmp $b
+ } keys %$self ];
+}
+
+
+sub header_field_names {
+ my $self = shift;
+ return map $standard_case{$_} || $_, @{ $self->_sorted_field_names },
+ if wantarray;
+ return keys %$self;
+}
+
+
+sub scan
+{
+ my($self, $sub) = @_;
+ my $key;
+ for $key (@{ $self->_sorted_field_names }) {
+ next if substr($key, 0, 1) eq '_';
+ my $vals = $self->{$key};
+ if (ref($vals) eq 'ARRAY') {
+ my $val;
+ for $val (@$vals) {
+ $sub->($standard_case{$key} || $key, $val);
+ }
+ }
+ else {
+ $sub->($standard_case{$key} || $key, $vals);
+ }
+ }
+}
+
+
+sub as_string
+{
+ my($self, $endl) = @_;
+ $endl = "\n" unless defined $endl;
+
+ my @result = ();
+ for my $key (@{ $self->_sorted_field_names }) {
+ next if index($key, '_') == 0;
+ my $vals = $self->{$key};
+ if ( ref($vals) eq 'ARRAY' ) {
+ for my $val (@$vals) {
+ my $field = $standard_case{$key} || $key;
+ $field =~ s/^://;
+ if ( index($val, "\n") >= 0 ) {
+ $val = _process_newline($val, $endl);
+ }
+ push @result, $field . ': ' . $val;
+ }
+ }
+ else {
+ my $field = $standard_case{$key} || $key;
+ $field =~ s/^://;
+ if ( index($vals, "\n") >= 0 ) {
+ $vals = _process_newline($vals, $endl);
+ }
+ push @result, $field . ': ' . $vals;
+ }
+ }
+
+ join($endl, @result, '');
+}
+
+sub _process_newline {
+ local $_ = shift;
+ my $endl = shift;
+ # must handle header values with embedded newlines with care
+ s/\s+$//; # trailing newlines and space must go
+ s/\n(\x0d?\n)+/\n/g; # no empty lines
+ s/\n([^\040\t])/\n $1/g; # intial space for continuation
+ s/\n/$endl/g; # substitute with requested line ending
+ $_;
+}
+
+
+
+if (eval { require Storable; 1 }) {
+ *clone = \&Storable::dclone;
+} else {
+ *clone = sub {
+ my $self = shift;
+ my $clone = HTTP::Headers->new;
+ $self->scan(sub { $clone->push_header(@_);} );
+ $clone;
+ };
+}
+
+
+sub _date_header
+{
+ require HTTP::Date;
+ my($self, $header, $time) = @_;
+ my($old) = $self->_header($header);
+ if (defined $time) {
+ $self->_header($header, HTTP::Date::time2str($time));
+ }
+ $old =~ s/;.*// if defined($old);
+ HTTP::Date::str2time($old);
+}
+
+
+sub date { shift->_date_header('Date', @_); }
+sub expires { shift->_date_header('Expires', @_); }
+sub if_modified_since { shift->_date_header('If-Modified-Since', @_); }
+sub if_unmodified_since { shift->_date_header('If-Unmodified-Since', @_); }
+sub last_modified { shift->_date_header('Last-Modified', @_); }
+
+# This is used as a private LWP extension. The Client-Date header is
+# added as a timestamp to a response when it has been received.
+sub client_date { shift->_date_header('Client-Date', @_); }
+
+# The retry_after field is dual format (can also be a expressed as
+# number of seconds from now), so we don't provide an easy way to
+# access it until we have know how both these interfaces can be
+# addressed. One possibility is to return a negative value for
+# relative seconds and a positive value for epoch based time values.
+#sub retry_after { shift->_date_header('Retry-After', @_); }
+
+sub content_type {
+ my $self = shift;
+ my $ct = $self->{'content-type'};
+ $self->{'content-type'} = shift if @_;
+ $ct = $ct->[0] if ref($ct) eq 'ARRAY';
+ return '' unless defined($ct) && length($ct);
+ my @ct = split(/;\s*/, $ct, 2);
+ for ($ct[0]) {
+ s/\s+//g;
+ $_ = lc($_);
+ }
+ wantarray ? @ct : $ct[0];
+}
+
+sub content_type_charset {
+ my $self = shift;
+ require HTTP::Headers::Util;
+ my $h = $self->{'content-type'};
+ $h = $h->[0] if ref($h);
+ $h = "" unless defined $h;
+ my @v = HTTP::Headers::Util::split_header_words($h);
+ if (@v) {
+ my($ct, undef, %ct_param) = @{$v[0]};
+ my $charset = $ct_param{charset};
+ if ($ct) {
+ $ct = lc($ct);
+ $ct =~ s/\s+//;
+ }
+ if ($charset) {
+ $charset = uc($charset);
+ $charset =~ s/^\s+//; $charset =~ s/\s+\z//;
+ undef($charset) if $charset eq "";
+ }
+ return $ct, $charset if wantarray;
+ return $charset;
+ }
+ return undef, undef if wantarray;
+ return undef;
+}
+
+sub content_is_text {
+ my $self = shift;
+ return $self->content_type =~ m,^text/,;
+}
+
+sub content_is_html {
+ my $self = shift;
+ return $self->content_type eq 'text/html' || $self->content_is_xhtml;
+}
+
+sub content_is_xhtml {
+ my $ct = shift->content_type;
+ return $ct eq "application/xhtml+xml" ||
+ $ct eq "application/vnd.wap.xhtml+xml";
+}
+
+sub content_is_xml {
+ my $ct = shift->content_type;
+ return 1 if $ct eq "text/xml";
+ return 1 if $ct eq "application/xml";
+ return 1 if $ct =~ /\+xml$/;
+ return 0;
+}
+
+sub referer {
+ my $self = shift;
+ if (@_ && $_[0] =~ /#/) {
+ # Strip fragment per RFC 2616, section 14.36.
+ my $uri = shift;
+ if (ref($uri)) {
+ $uri = $uri->clone;
+ $uri->fragment(undef);
+ }
+ else {
+ $uri =~ s/\#.*//;
+ }
+ unshift @_, $uri;
+ }
+ ($self->_header('Referer', @_))[0];
+}
+*referrer = \&referer; # on tchrist's request
+
+sub title { (shift->_header('Title', @_))[0] }
+sub content_encoding { (shift->_header('Content-Encoding', @_))[0] }
+sub content_language { (shift->_header('Content-Language', @_))[0] }
+sub content_length { (shift->_header('Content-Length', @_))[0] }
+
+sub user_agent { (shift->_header('User-Agent', @_))[0] }
+sub server { (shift->_header('Server', @_))[0] }
+
+sub from { (shift->_header('From', @_))[0] }
+sub warning { (shift->_header('Warning', @_))[0] }
+
+sub www_authenticate { (shift->_header('WWW-Authenticate', @_))[0] }
+sub authorization { (shift->_header('Authorization', @_))[0] }
+
+sub proxy_authenticate { (shift->_header('Proxy-Authenticate', @_))[0] }
+sub proxy_authorization { (shift->_header('Proxy-Authorization', @_))[0] }
+
+sub authorization_basic { shift->_basic_auth("Authorization", @_) }
+sub proxy_authorization_basic { shift->_basic_auth("Proxy-Authorization", @_) }
+
+sub _basic_auth {
+ require MIME::Base64;
+ my($self, $h, $user, $passwd) = @_;
+ my($old) = $self->_header($h);
+ if (defined $user) {
+ Carp::croak("Basic authorization user name can't contain ':'")
+ if $user =~ /:/;
+ $passwd = '' unless defined $passwd;
+ $self->_header($h => 'Basic ' .
+ MIME::Base64::encode("$user:$passwd", ''));
+ }
+ if (defined $old && $old =~ s/^\s*Basic\s+//) {
+ my $val = MIME::Base64::decode($old);
+ return $val unless wantarray;
+ return split(/:/, $val, 2);
+ }
+ return;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Headers - Class encapsulating HTTP Message headers
+
+=head1 SYNOPSIS
+
+ require HTTP::Headers;
+ $h = HTTP::Headers->new;
+
+ $h->header('Content-Type' => 'text/plain'); # set
+ $ct = $h->header('Content-Type'); # get
+ $h->remove_header('Content-Type'); # delete
+
+=head1 DESCRIPTION
+
+The C<HTTP::Headers> class encapsulates HTTP-style message headers.
+The headers consist of attribute-value pairs also called fields, which
+may be repeated, and which are printed in a particular order. The
+field names are cases insensitive.
+
+Instances of this class are usually created as member variables of the
+C<HTTP::Request> and C<HTTP::Response> classes, internal to the
+library.
+
+The following methods are available:
+
+=over 4
+
+=item $h = HTTP::Headers->new
+
+Constructs a new C<HTTP::Headers> object. You might pass some initial
+attribute-value pairs as parameters to the constructor. I<E.g.>:
+
+ $h = HTTP::Headers->new(
+ Date => 'Thu, 03 Feb 1994 00:00:00 GMT',
+ Content_Type => 'text/html; version=3.2',
+ Content_Base => 'http://www.perl.org/');
+
+The constructor arguments are passed to the C<header> method which is
+described below.
+
+=item $h->clone
+
+Returns a copy of this C<HTTP::Headers> object.
+
+=item $h->header( $field )
+
+=item $h->header( $field => $value )
+
+=item $h->header( $f1 => $v1, $f2 => $v2, ... )
+
+Get or set the value of one or more header fields. The header field
+name ($field) is not case sensitive. To make the life easier for perl
+users who wants to avoid quoting before the => operator, you can use
+'_' as a replacement for '-' in header names.
+
+The header() method accepts multiple ($field => $value) pairs, which
+means that you can update several fields with a single invocation.
+
+The $value argument may be a plain string or a reference to an array
+of strings for a multi-valued field. If the $value is provided as
+C<undef> then the field is removed. If the $value is not given, then
+that header field will remain unchanged.
+
+The old value (or values) of the last of the header fields is returned.
+If no such field exists C<undef> will be returned.
+
+A multi-valued field will be returned as separate values in list
+context and will be concatenated with ", " as separator in scalar
+context. The HTTP spec (RFC 2616) promise that joining multiple
+values in this way will not change the semantic of a header field, but
+in practice there are cases like old-style Netscape cookies (see
+L<HTTP::Cookies>) where "," is used as part of the syntax of a single
+field value.
+
+Examples:
+
+ $header->header(MIME_Version => '1.0',
+ User_Agent => 'My-Web-Client/0.01');
+ $header->header(Accept => "text/html, text/plain, image/*");
+ $header->header(Accept => [qw(text/html text/plain image/*)]);
+ @accepts = $header->header('Accept'); # get multiple values
+ $accepts = $header->header('Accept'); # get values as a single string
+
+=item $h->push_header( $field => $value )
+
+=item $h->push_header( $f1 => $v1, $f2 => $v2, ... )
+
+Add a new field value for the specified header field. Previous values
+for the same field are retained.
+
+As for the header() method, the field name ($field) is not case
+sensitive and '_' can be used as a replacement for '-'.
+
+The $value argument may be a scalar or a reference to a list of
+scalars.
+
+ $header->push_header(Accept => 'image/jpeg');
+ $header->push_header(Accept => [map "image/$_", qw(gif png tiff)]);
+
+=item $h->init_header( $field => $value )
+
+Set the specified header to the given value, but only if no previous
+value for that field is set.
+
+The header field name ($field) is not case sensitive and '_'
+can be used as a replacement for '-'.
+
+The $value argument may be a scalar or a reference to a list of
+scalars.
+
+=item $h->remove_header( $field, ... )
+
+This function removes the header fields with the specified names.
+
+The header field names ($field) are not case sensitive and '_'
+can be used as a replacement for '-'.
+
+The return value is the values of the fields removed. In scalar
+context the number of fields removed is returned.
+
+Note that if you pass in multiple field names then it is generally not
+possible to tell which of the returned values belonged to which field.
+
+=item $h->remove_content_headers
+
+This will remove all the header fields used to describe the content of
+a message. All header field names prefixed with C<Content-> fall
+into this category, as well as C<Allow>, C<Expires> and
+C<Last-Modified>. RFC 2616 denotes these fields as I<Entity Header
+Fields>.
+
+The return value is a new C<HTTP::Headers> object that contains the
+removed headers only.
+
+=item $h->clear
+
+This will remove all header fields.
+
+=item $h->header_field_names
+
+Returns the list of distinct names for the fields present in the
+header. The field names have case as suggested by HTTP spec, and the
+names are returned in the recommended "Good Practice" order.
+
+In scalar context return the number of distinct field names.
+
+=item $h->scan( \&process_header_field )
+
+Apply a subroutine to each header field in turn. The callback routine
+is called with two parameters; the name of the field and a single
+value (a string). If a header field is multi-valued, then the
+routine is called once for each value. The field name passed to the
+callback routine has case as suggested by HTTP spec, and the headers
+will be visited in the recommended "Good Practice" order.
+
+Any return values of the callback routine are ignored. The loop can
+be broken by raising an exception (C<die>), but the caller of scan()
+would have to trap the exception itself.
+
+=item $h->as_string
+
+=item $h->as_string( $eol )
+
+Return the header fields as a formatted MIME header. Since it
+internally uses the C<scan> method to build the string, the result
+will use case as suggested by HTTP spec, and it will follow
+recommended "Good Practice" of ordering the header fields. Long header
+values are not folded.
+
+The optional $eol parameter specifies the line ending sequence to
+use. The default is "\n". Embedded "\n" characters in header field
+values will be substituted with this line ending sequence.
+
+=back
+
+=head1 CONVENIENCE METHODS
+
+The most frequently used headers can also be accessed through the
+following convenience methods. Most of these methods can both be used to read
+and to set the value of a header. The header value is set if you pass
+an argument to the method. The old header value is always returned.
+If the given header did not exist then C<undef> is returned.
+
+Methods that deal with dates/times always convert their value to system
+time (seconds since Jan 1, 1970) and they also expect this kind of
+value when the header value is set.
+
+=over 4
+
+=item $h->date
+
+This header represents the date and time at which the message was
+originated. I<E.g.>:
+
+ $h->date(time); # set current date
+
+=item $h->expires
+
+This header gives the date and time after which the entity should be
+considered stale.
+
+=item $h->if_modified_since
+
+=item $h->if_unmodified_since
+
+These header fields are used to make a request conditional. If the requested
+resource has (or has not) been modified since the time specified in this field,
+then the server will return a C<304 Not Modified> response instead of
+the document itself.
+
+=item $h->last_modified
+
+This header indicates the date and time at which the resource was last
+modified. I<E.g.>:
+
+ # check if document is more than 1 hour old
+ if (my $last_mod = $h->last_modified) {
+ if ($last_mod < time - 60*60) {
+ ...
+ }
+ }
+
+=item $h->content_type
+
+The Content-Type header field indicates the media type of the message
+content. I<E.g.>:
+
+ $h->content_type('text/html');
+
+The value returned will be converted to lower case, and potential
+parameters will be chopped off and returned as a separate value if in
+an array context. If there is no such header field, then the empty
+string is returned. This makes it safe to do the following:
+
+ if ($h->content_type eq 'text/html') {
+ # we enter this place even if the real header value happens to
+ # be 'TEXT/HTML; version=3.0'
+ ...
+ }
+
+=item $h->content_type_charset
+
+Returns the upper-cased charset specified in the Content-Type header. In list
+context return the lower-cased bare content type followed by the upper-cased
+charset. Both values will be C<undef> if not specified in the header.
+
+=item $h->content_is_text
+
+Returns TRUE if the Content-Type header field indicate that the
+content is textual.
+
+=item $h->content_is_html
+
+Returns TRUE if the Content-Type header field indicate that the
+content is some kind of HTML (including XHTML). This method can't be
+used to set Content-Type.
+
+=item $h->content_is_xhtml
+
+Returns TRUE if the Content-Type header field indicate that the
+content is XHTML. This method can't be used to set Content-Type.
+
+=item $h->content_is_xml
+
+Returns TRUE if the Content-Type header field indicate that the
+content is XML. This method can't be used to set Content-Type.
+
+=item $h->content_encoding
+
+The Content-Encoding header field is used as a modifier to the
+media type. When present, its value indicates what additional
+encoding mechanism has been applied to the resource.
+
+=item $h->content_length
+
+A decimal number indicating the size in bytes of the message content.
+
+=item $h->content_language
+
+The natural language(s) of the intended audience for the message
+content. The value is one or more language tags as defined by RFC
+1766. Eg. "no" for some kind of Norwegian and "en-US" for English the
+way it is written in the US.
+
+=item $h->title
+
+The title of the document. In libwww-perl this header will be
+initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element
+of HTML documents. I<This header is no longer part of the HTTP
+standard.>
+
+=item $h->user_agent
+
+This header field is used in request messages and contains information
+about the user agent originating the request. I<E.g.>:
+
+ $h->user_agent('Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)');
+
+=item $h->server
+
+The server header field contains information about the software being
+used by the originating server program handling the request.
+
+=item $h->from
+
+This header should contain an Internet e-mail address for the human
+user who controls the requesting user agent. The address should be
+machine-usable, as defined by RFC822. E.g.:
+
+ $h->from('King Kong <king@kong.com>');
+
+I<This header is no longer part of the HTTP standard.>
+
+=item $h->referer
+
+Used to specify the address (URI) of the document from which the
+requested resource address was obtained.
+
+The "Free On-line Dictionary of Computing" as this to say about the
+word I<referer>:
+
+ <World-Wide Web> A misspelling of "referrer" which
+ somehow made it into the {HTTP} standard. A given {web
+ page}'s referer (sic) is the {URL} of whatever web page
+ contains the link that the user followed to the current
+ page. Most browsers pass this information as part of a
+ request.
+
+ (1998-10-19)
+
+By popular demand C<referrer> exists as an alias for this method so you
+can avoid this misspelling in your programs and still send the right
+thing on the wire.
+
+When setting the referrer, this method removes the fragment from the
+given URI if it is present, as mandated by RFC2616. Note that
+the removal does I<not> happen automatically if using the header(),
+push_header() or init_header() methods to set the referrer.
+
+=item $h->www_authenticate
+
+This header must be included as part of a C<401 Unauthorized> response.
+The field value consist of a challenge that indicates the
+authentication scheme and parameters applicable to the requested URI.
+
+=item $h->proxy_authenticate
+
+This header must be included in a C<407 Proxy Authentication Required>
+response.
+
+=item $h->authorization
+
+=item $h->proxy_authorization
+
+A user agent that wishes to authenticate itself with a server or a
+proxy, may do so by including these headers.
+
+=item $h->authorization_basic
+
+This method is used to get or set an authorization header that use the
+"Basic Authentication Scheme". In array context it will return two
+values; the user name and the password. In scalar context it will
+return I<"uname:password"> as a single string value.
+
+When used to set the header value, it expects two arguments. I<E.g.>:
+
+ $h->authorization_basic($uname, $password);
+
+The method will croak if the $uname contains a colon ':'.
+
+=item $h->proxy_authorization_basic
+
+Same as authorization_basic() but will set the "Proxy-Authorization"
+header instead.
+
+=back
+
+=head1 NON-CANONICALIZED FIELD NAMES
+
+The header field name spelling is normally canonicalized including the
+'_' to '-' translation. There are some application where this is not
+appropriate. Prefixing field names with ':' allow you to force a
+specific spelling. For example if you really want a header field name
+to show up as C<foo_bar> instead of "Foo-Bar", you might set it like
+this:
+
+ $h->header(":foo_bar" => 1);
+
+These field names are returned with the ':' intact for
+$h->header_field_names and the $h->scan callback, but the colons do
+not show in $h->as_string.
+
+=head1 COPYRIGHT
+
+Copyright 1995-2005 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Headers::Auth;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = "5.817";
+
+use HTTP::Headers;
+
+package HTTP::Headers;
+
+BEGIN {
+ # we provide a new (and better) implementations below
+ undef(&www_authenticate);
+ undef(&proxy_authenticate);
+}
+
+require HTTP::Headers::Util;
+
+sub _parse_authenticate
+{
+ my @ret;
+ for (HTTP::Headers::Util::split_header_words(@_)) {
+ if (!defined($_->[1])) {
+ # this is a new auth scheme
+ push(@ret, shift(@$_) => {});
+ shift @$_;
+ }
+ if (@ret) {
+ # this a new parameter pair for the last auth scheme
+ while (@$_) {
+ my $k = shift @$_;
+ my $v = shift @$_;
+ $ret[-1]{$k} = $v;
+ }
+ }
+ else {
+ # something wrong, parameter pair without any scheme seen
+ # IGNORE
+ }
+ }
+ @ret;
+}
+
+sub _authenticate
+{
+ my $self = shift;
+ my $header = shift;
+ my @old = $self->_header($header);
+ if (@_) {
+ $self->remove_header($header);
+ my @new = @_;
+ while (@new) {
+ my $a_scheme = shift(@new);
+ if ($a_scheme =~ /\s/) {
+ # assume complete valid value, pass it through
+ $self->push_header($header, $a_scheme);
+ }
+ else {
+ my @param;
+ if (@new) {
+ my $p = $new[0];
+ if (ref($p) eq "ARRAY") {
+ @param = @$p;
+ shift(@new);
+ }
+ elsif (ref($p) eq "HASH") {
+ @param = %$p;
+ shift(@new);
+ }
+ }
+ my $val = ucfirst(lc($a_scheme));
+ if (@param) {
+ my $sep = " ";
+ while (@param) {
+ my $k = shift @param;
+ my $v = shift @param;
+ if ($v =~ /[^0-9a-zA-Z]/ || lc($k) eq "realm") {
+ # must quote the value
+ $v =~ s,([\\\"]),\\$1,g;
+ $v = qq("$v");
+ }
+ $val .= "$sep$k=$v";
+ $sep = ", ";
+ }
+ }
+ $self->push_header($header, $val);
+ }
+ }
+ }
+ return unless defined wantarray;
+ wantarray ? _parse_authenticate(@old) : join(", ", @old);
+}
+
+
+sub www_authenticate { shift->_authenticate("WWW-Authenticate", @_) }
+sub proxy_authenticate { shift->_authenticate("Proxy-Authenticate", @_) }
+
+1;
--- /dev/null
+package HTTP::Headers::ETag;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = "5.810";
+
+require HTTP::Date;
+
+require HTTP::Headers;
+package HTTP::Headers;
+
+sub _etags
+{
+ my $self = shift;
+ my $header = shift;
+ my @old = _split_etag_list($self->_header($header));
+ if (@_) {
+ $self->_header($header => join(", ", _split_etag_list(@_)));
+ }
+ wantarray ? @old : join(", ", @old);
+}
+
+sub etag { shift->_etags("ETag", @_); }
+sub if_match { shift->_etags("If-Match", @_); }
+sub if_none_match { shift->_etags("If-None-Match", @_); }
+
+sub if_range {
+ # Either a date or an entity-tag
+ my $self = shift;
+ my @old = $self->_header("If-Range");
+ if (@_) {
+ my $new = shift;
+ if (!defined $new) {
+ $self->remove_header("If-Range");
+ }
+ elsif ($new =~ /^\d+$/) {
+ $self->_date_header("If-Range", $new);
+ }
+ else {
+ $self->_etags("If-Range", $new);
+ }
+ }
+ return unless defined(wantarray);
+ for (@old) {
+ my $t = HTTP::Date::str2time($_);
+ $_ = $t if $t;
+ }
+ wantarray ? @old : join(", ", @old);
+}
+
+
+# Split a list of entity tag values. The return value is a list
+# consisting of one element per entity tag. Suitable for parsing
+# headers like C<If-Match>, C<If-None-Match>. You might even want to
+# use it on C<ETag> and C<If-Range> entity tag values, because it will
+# normalize them to the common form.
+#
+# entity-tag = [ weak ] opaque-tag
+# weak = "W/"
+# opaque-tag = quoted-string
+
+
+sub _split_etag_list
+{
+ my(@val) = @_;
+ my @res;
+ for (@val) {
+ while (length) {
+ my $weak = "";
+ $weak = "W/" if s,^\s*[wW]/,,;
+ my $etag = "";
+ if (s/^\s*(\"[^\"\\]*(?:\\.[^\"\\]*)*\")//) {
+ push(@res, "$weak$1");
+ }
+ elsif (s/^\s*,//) {
+ push(@res, qq(W/"")) if $weak;
+ }
+ elsif (s/^\s*([^,\s]+)//) {
+ $etag = $1;
+ $etag =~ s/([\"\\])/\\$1/g;
+ push(@res, qq($weak"$etag"));
+ }
+ elsif (s/^\s+// || !length) {
+ push(@res, qq(W/"")) if $weak;
+ }
+ else {
+ die "This should not happen: '$_'";
+ }
+ }
+ }
+ @res;
+}
+
+1;
--- /dev/null
+package HTTP::Headers::Util;
+
+use strict;
+use vars qw($VERSION @ISA @EXPORT_OK);
+
+$VERSION = "5.817";
+
+require Exporter;
+@ISA=qw(Exporter);
+
+@EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
+
+
+
+sub split_header_words {
+ my @res = &_split_header_words;
+ for my $arr (@res) {
+ for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
+ $arr->[$i] = lc($arr->[$i]);
+ }
+ }
+ return @res;
+}
+
+sub _split_header_words
+{
+ my(@val) = @_;
+ my @res;
+ for (@val) {
+ my @cur;
+ while (length) {
+ if (s/^\s*(=*[^\s=;,]+)//) { # 'token' or parameter 'attribute'
+ push(@cur, $1);
+ # a quoted value
+ if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
+ my $val = $1;
+ $val =~ s/\\(.)/$1/g;
+ push(@cur, $val);
+ # some unquoted value
+ }
+ elsif (s/^\s*=\s*([^;,\s]*)//) {
+ my $val = $1;
+ $val =~ s/\s+$//;
+ push(@cur, $val);
+ # no value, a lone token
+ }
+ else {
+ push(@cur, undef);
+ }
+ }
+ elsif (s/^\s*,//) {
+ push(@res, [@cur]) if @cur;
+ @cur = ();
+ }
+ elsif (s/^\s*;// || s/^\s+//) {
+ # continue
+ }
+ else {
+ die "This should not happen: '$_'";
+ }
+ }
+ push(@res, \@cur) if @cur;
+ }
+ @res;
+}
+
+
+sub join_header_words
+{
+ @_ = ([@_]) if @_ && !ref($_[0]);
+ my @res;
+ for (@_) {
+ my @cur = @$_;
+ my @attr;
+ while (@cur) {
+ my $k = shift @cur;
+ my $v = shift @cur;
+ if (defined $v) {
+ if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
+ $v =~ s/([\"\\])/\\$1/g; # escape " and \
+ $k .= qq(="$v");
+ }
+ else {
+ # token
+ $k .= "=$v";
+ }
+ }
+ push(@attr, $k);
+ }
+ push(@res, join("; ", @attr)) if @attr;
+ }
+ join(", ", @res);
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Headers::Util - Header value parsing utility functions
+
+=head1 SYNOPSIS
+
+ use HTTP::Headers::Util qw(split_header_words);
+ @values = split_header_words($h->header("Content-Type"));
+
+=head1 DESCRIPTION
+
+This module provides a few functions that helps parsing and
+construction of valid HTTP header values. None of the functions are
+exported by default.
+
+The following functions are available:
+
+=over 4
+
+
+=item split_header_words( @header_values )
+
+This function will parse the header values given as argument into a
+list of anonymous arrays containing key/value pairs. The function
+knows how to deal with ",", ";" and "=" as well as quoted values after
+"=". A list of space separated tokens are parsed as if they were
+separated by ";".
+
+If the @header_values passed as argument contains multiple values,
+then they are treated as if they were a single value separated by
+comma ",".
+
+This means that this function is useful for parsing header fields that
+follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+the requirement for tokens).
+
+ headers = #header
+ header = (token | parameter) *( [";"] (token | parameter))
+
+ token = 1*<any CHAR except CTLs or separators>
+ separators = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT
+
+ quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
+ qdtext = <any TEXT except <">>
+ quoted-pair = "\" CHAR
+
+ parameter = attribute "=" value
+ attribute = token
+ value = token | quoted-string
+
+Each I<header> is represented by an anonymous array of key/value
+pairs. The keys will be all be forced to lower case.
+The value for a simple token (not part of a parameter) is C<undef>.
+Syntactically incorrect headers will not necessary be parsed as you
+would want.
+
+This is easier to describe with some examples:
+
+ split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
+ split_header_words('text/html; charset="iso-8859-1"');
+ split_header_words('Basic realm="\\"foo\\\\bar\\""');
+
+will return
+
+ [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
+ ['text/html' => undef, charset => 'iso-8859-1']
+ [basic => undef, realm => "\"foo\\bar\""]
+
+If you don't want the function to convert tokens and attribute keys to
+lower case you can call it as C<_split_header_words> instead (with a
+leading underscore).
+
+=item join_header_words( @arrays )
+
+This will do the opposite of the conversion done by split_header_words().
+It takes a list of anonymous arrays as arguments (or a list of
+key/value pairs) and produces a single header value. Attribute values
+are quoted if needed.
+
+Example:
+
+ join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
+ join_header_words("text/plain" => undef, charset => "iso-8859/1");
+
+will both return the string:
+
+ text/plain; charset="iso-8859/1"
+
+=back
+
+=head1 COPYRIGHT
+
+Copyright 1997-1998, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Message;
+
+use strict;
+use vars qw($VERSION $AUTOLOAD);
+$VERSION = "5.835";
+
+require HTTP::Headers;
+require Carp;
+
+my $CRLF = "\015\012"; # "\r\n" is not portable
+$HTTP::URI_CLASS ||= $ENV{PERL_HTTP_URI_CLASS} || "URI";
+eval "require $HTTP::URI_CLASS"; die $@ if $@;
+
+*_utf8_downgrade = defined(&utf8::downgrade) ?
+ sub {
+ utf8::downgrade($_[0], 1) or
+ Carp::croak("HTTP::Message content must be bytes")
+ }
+ :
+ sub {
+ };
+
+sub new
+{
+ my($class, $header, $content) = @_;
+ if (defined $header) {
+ Carp::croak("Bad header argument") unless ref $header;
+ if (ref($header) eq "ARRAY") {
+ $header = HTTP::Headers->new(@$header);
+ }
+ else {
+ $header = $header->clone;
+ }
+ }
+ else {
+ $header = HTTP::Headers->new;
+ }
+ if (defined $content) {
+ _utf8_downgrade($content);
+ }
+ else {
+ $content = '';
+ }
+
+ bless {
+ '_headers' => $header,
+ '_content' => $content,
+ }, $class;
+}
+
+
+sub parse
+{
+ my($class, $str) = @_;
+
+ my @hdr;
+ while (1) {
+ if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) {
+ push(@hdr, $1, $2);
+ $hdr[-1] =~ s/\r\z//;
+ }
+ elsif (@hdr && $str =~ s/^([ \t].*)\n?//) {
+ $hdr[-1] .= "\n$1";
+ $hdr[-1] =~ s/\r\z//;
+ }
+ else {
+ $str =~ s/^\r?\n//;
+ last;
+ }
+ }
+ local $HTTP::Headers::TRANSLATE_UNDERSCORE;
+ new($class, \@hdr, $str);
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $clone = HTTP::Message->new($self->headers,
+ $self->content);
+ $clone->protocol($self->protocol);
+ $clone;
+}
+
+
+sub clear {
+ my $self = shift;
+ $self->{_headers}->clear;
+ $self->content("");
+ delete $self->{_parts};
+ return;
+}
+
+
+sub protocol {
+ shift->_elem('_protocol', @_);
+}
+
+sub headers {
+ my $self = shift;
+
+ # recalculation of _content might change headers, so we
+ # need to force it now
+ $self->_content unless exists $self->{_content};
+
+ $self->{_headers};
+}
+
+sub headers_as_string {
+ shift->headers->as_string(@_);
+}
+
+
+sub content {
+
+ my $self = $_[0];
+ if (defined(wantarray)) {
+ $self->_content unless exists $self->{_content};
+ my $old = $self->{_content};
+ $old = $$old if ref($old) eq "SCALAR";
+ &_set_content if @_ > 1;
+ return $old;
+ }
+
+ if (@_ > 1) {
+ &_set_content;
+ }
+ else {
+ Carp::carp("Useless content call in void context") if $^W;
+ }
+}
+
+
+sub _set_content {
+ my $self = $_[0];
+ _utf8_downgrade($_[1]);
+ if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") {
+ ${$self->{_content}} = $_[1];
+ }
+ else {
+ die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR";
+ $self->{_content} = $_[1];
+ delete $self->{_content_ref};
+ }
+ delete $self->{_parts} unless $_[2];
+}
+
+
+sub add_content
+{
+ my $self = shift;
+ $self->_content unless exists $self->{_content};
+ my $chunkref = \$_[0];
+ $chunkref = $$chunkref if ref($$chunkref); # legacy
+
+ _utf8_downgrade($$chunkref);
+
+ my $ref = ref($self->{_content});
+ if (!$ref) {
+ $self->{_content} .= $$chunkref;
+ }
+ elsif ($ref eq "SCALAR") {
+ ${$self->{_content}} .= $$chunkref;
+ }
+ else {
+ Carp::croak("Can't append to $ref content");
+ }
+ delete $self->{_parts};
+}
+
+sub add_content_utf8 {
+ my($self, $buf) = @_;
+ utf8::upgrade($buf);
+ utf8::encode($buf);
+ $self->add_content($buf);
+}
+
+sub content_ref
+{
+ my $self = shift;
+ $self->_content unless exists $self->{_content};
+ delete $self->{_parts};
+ my $old = \$self->{_content};
+ my $old_cref = $self->{_content_ref};
+ if (@_) {
+ my $new = shift;
+ Carp::croak("Setting content_ref to a non-ref") unless ref($new);
+ delete $self->{_content}; # avoid modifying $$old
+ $self->{_content} = $new;
+ $self->{_content_ref}++;
+ }
+ $old = $$old if $old_cref;
+ return $old;
+}
+
+
+sub content_charset
+{
+ my $self = shift;
+ if (my $charset = $self->content_type_charset) {
+ return $charset;
+ }
+
+ # time to start guessing
+ my $cref = $self->decoded_content(ref => 1, charset => "none");
+
+ # Unicode BOM
+ for ($$cref) {
+ return "UTF-8" if /^\xEF\xBB\xBF/;
+ return "UTF-32-LE" if /^\xFF\xFE\x00\x00/;
+ return "UTF-32-BE" if /^\x00\x00\xFE\xFF/;
+ return "UTF-16-LE" if /^\xFF\xFE/;
+ return "UTF-16-BE" if /^\xFE\xFF/;
+ }
+
+ if ($self->content_is_xml) {
+ # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
+ # XML entity not accompanied by external encoding information and not
+ # in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration,
+ # in which the first characters must be '<?xml'
+ for ($$cref) {
+ return "UTF-32-BE" if /^\x00\x00\x00</;
+ return "UTF-32-LE" if /^<\x00\x00\x00/;
+ return "UTF-16-BE" if /^(?:\x00\s)*\x00</;
+ return "UTF-16-LE" if /^(?:\s\x00)*<\x00/;
+ if (/^\s*(<\?xml[^\x00]*?\?>)/) {
+ if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) {
+ my $enc = $2;
+ $enc =~ s/^\s+//; $enc =~ s/\s+\z//;
+ return $enc if $enc;
+ }
+ }
+ }
+ return "UTF-8";
+ }
+ elsif ($self->content_is_html) {
+ # look for <META charset="..."> or <META content="...">
+ # http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding
+ my $charset;
+ require HTML::Parser;
+ my $p = HTML::Parser->new(
+ start_h => [sub {
+ my($tag, $attr, $self) = @_;
+ $charset = $attr->{charset};
+ unless ($charset) {
+ # look at $attr->{content} ...
+ if (my $c = $attr->{content}) {
+ require HTTP::Headers::Util;
+ my @v = HTTP::Headers::Util::split_header_words($c);
+ return unless @v;
+ my($ct, undef, %ct_param) = @{$v[0]};
+ $charset = $ct_param{charset};
+ }
+ return unless $charset;
+ }
+ if ($charset =~ /^utf-?16/i) {
+ # converted document, assume UTF-8
+ $charset = "UTF-8";
+ }
+ $self->eof;
+ }, "tagname, attr, self"],
+ report_tags => [qw(meta)],
+ utf8_mode => 1,
+ );
+ $p->parse($$cref);
+ return $charset if $charset;
+ }
+ if ($self->content_type =~ /^text\//) {
+ for ($$cref) {
+ if (length) {
+ return "US-ASCII" unless /[\x80-\xFF]/;
+ require Encode;
+ eval {
+ Encode::decode_utf8($_, Encode::FB_CROAK());
+ };
+ return "UTF-8" unless $@;
+ return "ISO-8859-1";
+ }
+ }
+ }
+
+ return undef;
+}
+
+
+sub decoded_content
+{
+ my($self, %opt) = @_;
+ my $content_ref;
+ my $content_ref_iscopy;
+
+ eval {
+ $content_ref = $self->content_ref;
+ die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
+
+ if (my $h = $self->header("Content-Encoding")) {
+ $h =~ s/^\s+//;
+ $h =~ s/\s+$//;
+ for my $ce (reverse split(/\s*,\s*/, lc($h))) {
+ next unless $ce;
+ next if $ce eq "identity";
+ if ($ce eq "gzip" || $ce eq "x-gzip") {
+ require IO::Uncompress::Gunzip;
+ my $output;
+ IO::Uncompress::Gunzip::gunzip($content_ref, \$output, Transparent => 0)
+ or die "Can't gunzip content: $IO::Uncompress::Gunzip::GunzipError";
+ $content_ref = \$output;
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "x-bzip2") {
+ require IO::Uncompress::Bunzip2;
+ my $output;
+ IO::Uncompress::Bunzip2::bunzip2($content_ref, \$output, Transparent => 0)
+ or die "Can't bunzip content: $IO::Uncompress::Bunzip2::Bunzip2Error";
+ $content_ref = \$output;
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "deflate") {
+ require IO::Uncompress::Inflate;
+ my $output;
+ my $status = IO::Uncompress::Inflate::inflate($content_ref, \$output, Transparent => 0);
+ my $error = $IO::Uncompress::Inflate::InflateError;
+ unless ($status) {
+ # "Content-Encoding: deflate" is supposed to mean the
+ # "zlib" format of RFC 1950, but Microsoft got that
+ # wrong, so some servers sends the raw compressed
+ # "deflate" data. This tries to inflate this format.
+ $output = undef;
+ require IO::Uncompress::RawInflate;
+ unless (IO::Uncompress::RawInflate::rawinflate($content_ref, \$output)) {
+ $self->push_header("Client-Warning" =>
+ "Could not raw inflate content: $IO::Uncompress::RawInflate::RawInflateError");
+ $output = undef;
+ }
+ }
+ die "Can't inflate content: $error" unless defined $output;
+ $content_ref = \$output;
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "compress" || $ce eq "x-compress") {
+ die "Can't uncompress content";
+ }
+ elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
+ require MIME::Base64;
+ $content_ref = \MIME::Base64::decode($$content_ref);
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
+ require MIME::QuotedPrint;
+ $content_ref = \MIME::QuotedPrint::decode($$content_ref);
+ $content_ref_iscopy++;
+ }
+ else {
+ die "Don't know how to decode Content-Encoding '$ce'";
+ }
+ }
+ }
+
+ if ($self->content_is_text || (my $is_xml = $self->content_is_xml)) {
+ my $charset = lc(
+ $opt{charset} ||
+ $self->content_type_charset ||
+ $opt{default_charset} ||
+ $self->content_charset ||
+ "ISO-8859-1"
+ );
+ unless ($charset =~ /^(?:none|us-ascii|iso-8859-1)\z/) {
+ require Encode;
+ if (do{my $v = $Encode::VERSION; $v =~ s/_//g; $v} < 2.0901 &&
+ !$content_ref_iscopy)
+ {
+ # LEAVE_SRC did not work before Encode-2.0901
+ my $copy = $$content_ref;
+ $content_ref = \$copy;
+ $content_ref_iscopy++;
+ }
+ eval {
+ $content_ref = \Encode::decode($charset, $$content_ref,
+ ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC());
+ };
+ if ($@) {
+ my $retried;
+ if ($@ =~ /^Unknown encoding/) {
+ my $alt_charset = lc($opt{alt_charset} || "");
+ if ($alt_charset && $charset ne $alt_charset) {
+ # Retry decoding with the alternative charset
+ $content_ref = \Encode::decode($alt_charset, $$content_ref,
+ ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC())
+ unless $alt_charset =~ /^(?:none|us-ascii|iso-8859-1)\z/;
+ $retried++;
+ }
+ }
+ die unless $retried;
+ }
+ die "Encode::decode() returned undef improperly" unless defined $$content_ref;
+ if ($is_xml) {
+ # Get rid of the XML encoding declaration if present
+ $$content_ref =~ s/^\x{FEFF}//;
+ if ($$content_ref =~ /^(\s*<\?xml[^\x00]*?\?>)/) {
+ substr($$content_ref, 0, length($1)) =~ s/\sencoding\s*=\s*(["']).*?\1//;
+ }
+ }
+ }
+ }
+ };
+ if ($@) {
+ Carp::croak($@) if $opt{raise_error};
+ return undef;
+ }
+
+ return $opt{ref} ? $content_ref : $$content_ref;
+}
+
+
+sub decodable
+{
+ # should match the Content-Encoding values that decoded_content can deal with
+ my $self = shift;
+ my @enc;
+ # XXX preferably we should determine if the modules are available without loading
+ # them here
+ eval {
+ require IO::Uncompress::Gunzip;
+ push(@enc, "gzip", "x-gzip");
+ };
+ eval {
+ require IO::Uncompress::Inflate;
+ require IO::Uncompress::RawInflate;
+ push(@enc, "deflate");
+ };
+ eval {
+ require IO::Uncompress::Bunzip2;
+ push(@enc, "x-bzip2");
+ };
+ # we don't care about announcing the 'identity', 'base64' and
+ # 'quoted-printable' stuff
+ return wantarray ? @enc : join(", ", @enc);
+}
+
+
+sub decode
+{
+ my $self = shift;
+ return 1 unless $self->header("Content-Encoding");
+ if (defined(my $content = $self->decoded_content(charset => "none"))) {
+ $self->remove_header("Content-Encoding", "Content-Length", "Content-MD5");
+ $self->content($content);
+ return 1;
+ }
+ return 0;
+}
+
+
+sub encode
+{
+ my($self, @enc) = @_;
+
+ Carp::croak("Can't encode multipart/* messages") if $self->content_type =~ m,^multipart/,;
+ Carp::croak("Can't encode message/* messages") if $self->content_type =~ m,^message/,;
+
+ return 1 unless @enc; # nothing to do
+
+ my $content = $self->content;
+ for my $encoding (@enc) {
+ if ($encoding eq "identity") {
+ # nothing to do
+ }
+ elsif ($encoding eq "base64") {
+ require MIME::Base64;
+ $content = MIME::Base64::encode($content);
+ }
+ elsif ($encoding eq "gzip" || $encoding eq "x-gzip") {
+ require IO::Compress::Gzip;
+ my $output;
+ IO::Compress::Gzip::gzip(\$content, \$output, Minimal => 1)
+ or die "Can't gzip content: $IO::Compress::Gzip::GzipError";
+ $content = $output;
+ }
+ elsif ($encoding eq "deflate") {
+ require IO::Compress::Deflate;
+ my $output;
+ IO::Compress::Deflate::deflate(\$content, \$output)
+ or die "Can't deflate content: $IO::Compress::Deflate::DeflateError";
+ $content = $output;
+ }
+ elsif ($encoding eq "x-bzip2") {
+ require IO::Compress::Bzip2;
+ my $output;
+ IO::Compress::Bzip2::bzip2(\$content, \$output)
+ or die "Can't bzip2 content: $IO::Compress::Bzip2::Bzip2Error";
+ $content = $output;
+ }
+ elsif ($encoding eq "rot13") { # for the fun of it
+ $content =~ tr/A-Za-z/N-ZA-Mn-za-m/;
+ }
+ else {
+ return 0;
+ }
+ }
+ my $h = $self->header("Content-Encoding");
+ unshift(@enc, $h) if $h;
+ $self->header("Content-Encoding", join(", ", @enc));
+ $self->remove_header("Content-Length", "Content-MD5");
+ $self->content($content);
+ return 1;
+}
+
+
+sub as_string
+{
+ my($self, $eol) = @_;
+ $eol = "\n" unless defined $eol;
+
+ # The calculation of content might update the headers
+ # so we need to do that first.
+ my $content = $self->content;
+
+ return join("", $self->{'_headers'}->as_string($eol),
+ $eol,
+ $content,
+ (@_ == 1 && length($content) &&
+ $content !~ /\n\z/) ? "\n" : "",
+ );
+}
+
+
+sub dump
+{
+ my($self, %opt) = @_;
+ my $content = $self->content;
+ my $chopped = 0;
+ if (!ref($content)) {
+ my $maxlen = $opt{maxlength};
+ $maxlen = 512 unless defined($maxlen);
+ if ($maxlen && length($content) > $maxlen * 1.1 + 3) {
+ $chopped = length($content) - $maxlen;
+ $content = substr($content, 0, $maxlen) . "...";
+ }
+
+ $content =~ s/\\/\\\\/g;
+ $content =~ s/\t/\\t/g;
+ $content =~ s/\r/\\r/g;
+
+ # no need for 3 digits in escape for these
+ $content =~ s/([\0-\11\13-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
+
+ $content =~ s/([\0-\11\13-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
+ $content =~ s/([^\12\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
+
+ # remaining whitespace
+ $content =~ s/( +)\n/("\\40" x length($1)) . "\n"/eg;
+ $content =~ s/(\n+)\n/("\\n" x length($1)) . "\n"/eg;
+ $content =~ s/\n\z/\\n/;
+
+ my $no_content = "(no content)";
+ if ($content eq $no_content) {
+ # escape our $no_content marker
+ $content =~ s/^(.)/sprintf('\\x%02X',ord($1))/eg;
+ }
+ elsif ($content eq "") {
+ $content = "(no content)";
+ }
+ }
+
+ my @dump;
+ push(@dump, $opt{preheader}) if $opt{preheader};
+ push(@dump, $self->{_headers}->as_string, $content);
+ push(@dump, "(+ $chopped more bytes not shown)") if $chopped;
+
+ my $dump = join("\n", @dump, "");
+ $dump =~ s/^/$opt{prefix}/gm if $opt{prefix};
+
+ print $dump unless defined wantarray;
+ return $dump;
+}
+
+
+sub parts {
+ my $self = shift;
+ if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) {
+ $self->_parts;
+ }
+ my $old = $self->{_parts};
+ if (@_) {
+ my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_;
+ my $ct = $self->content_type || "";
+ if ($ct =~ m,^message/,) {
+ Carp::croak("Only one part allowed for $ct content")
+ if @parts > 1;
+ }
+ elsif ($ct !~ m,^multipart/,) {
+ $self->remove_content_headers;
+ $self->content_type("multipart/mixed");
+ }
+ $self->{_parts} = \@parts;
+ _stale_content($self);
+ }
+ return @$old if wantarray;
+ return $old->[0];
+}
+
+sub add_part {
+ my $self = shift;
+ if (($self->content_type || "") !~ m,^multipart/,) {
+ my $p = HTTP::Message->new($self->remove_content_headers,
+ $self->content(""));
+ $self->content_type("multipart/mixed");
+ $self->{_parts} = [];
+ if ($p->headers->header_field_names || $p->content ne "") {
+ push(@{$self->{_parts}}, $p);
+ }
+ }
+ elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") {
+ $self->_parts;
+ }
+
+ push(@{$self->{_parts}}, @_);
+ _stale_content($self);
+ return;
+}
+
+sub _stale_content {
+ my $self = shift;
+ if (ref($self->{_content}) eq "SCALAR") {
+ # must recalculate now
+ $self->_content;
+ }
+ else {
+ # just invalidate cache
+ delete $self->{_content};
+ delete $self->{_content_ref};
+ }
+}
+
+
+# delegate all other method calls the the headers object.
+sub AUTOLOAD
+{
+ my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);
+
+ # We create the function here so that it will not need to be
+ # autoloaded the next time.
+ no strict 'refs';
+ *$method = sub { shift->headers->$method(@_) };
+ goto &$method;
+}
+
+
+sub DESTROY {} # avoid AUTOLOADing it
+
+
+# Private method to access members in %$self
+sub _elem
+{
+ my $self = shift;
+ my $elem = shift;
+ my $old = $self->{$elem};
+ $self->{$elem} = $_[0] if @_;
+ return $old;
+}
+
+
+# Create private _parts attribute from current _content
+sub _parts {
+ my $self = shift;
+ my $ct = $self->content_type;
+ if ($ct =~ m,^multipart/,) {
+ require HTTP::Headers::Util;
+ my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type"));
+ die "Assert" unless @h;
+ my %h = @{$h[0]};
+ if (defined(my $b = $h{boundary})) {
+ my $str = $self->content;
+ $str =~ s/\r?\n--\Q$b\E--\r?\n.*//s;
+ if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) {
+ $self->{_parts} = [map HTTP::Message->parse($_),
+ split(/\r?\n--\Q$b\E\r?\n/, $str)]
+ }
+ }
+ }
+ elsif ($ct eq "message/http") {
+ require HTTP::Request;
+ require HTTP::Response;
+ my $content = $self->content;
+ my $class = ($content =~ m,^(HTTP/.*)\n,) ?
+ "HTTP::Response" : "HTTP::Request";
+ $self->{_parts} = [$class->parse($content)];
+ }
+ elsif ($ct =~ m,^message/,) {
+ $self->{_parts} = [ HTTP::Message->parse($self->content) ];
+ }
+
+ $self->{_parts} ||= [];
+}
+
+
+# Create private _content attribute from current _parts
+sub _content {
+ my $self = shift;
+ my $ct = $self->{_headers}->header("Content-Type") || "multipart/mixed";
+ if ($ct =~ m,^\s*message/,i) {
+ _set_content($self, $self->{_parts}[0]->as_string($CRLF), 1);
+ return;
+ }
+
+ require HTTP::Headers::Util;
+ my @v = HTTP::Headers::Util::split_header_words($ct);
+ Carp::carp("Multiple Content-Type headers") if @v > 1;
+ @v = @{$v[0]};
+
+ my $boundary;
+ my $boundary_index;
+ for (my @tmp = @v; @tmp;) {
+ my($k, $v) = splice(@tmp, 0, 2);
+ if ($k eq "boundary") {
+ $boundary = $v;
+ $boundary_index = @v - @tmp - 1;
+ last;
+ }
+ }
+
+ my @parts = map $_->as_string($CRLF), @{$self->{_parts}};
+
+ my $bno = 0;
+ $boundary = _boundary() unless defined $boundary;
+ CHECK_BOUNDARY:
+ {
+ for (@parts) {
+ if (index($_, $boundary) >= 0) {
+ # must have a better boundary
+ $boundary = _boundary(++$bno);
+ redo CHECK_BOUNDARY;
+ }
+ }
+ }
+
+ if ($boundary_index) {
+ $v[$boundary_index] = $boundary;
+ }
+ else {
+ push(@v, boundary => $boundary);
+ }
+
+ $ct = HTTP::Headers::Util::join_header_words(@v);
+ $self->{_headers}->header("Content-Type", $ct);
+
+ _set_content($self, "--$boundary$CRLF" .
+ join("$CRLF--$boundary$CRLF", @parts) .
+ "$CRLF--$boundary--$CRLF",
+ 1);
+}
+
+
+sub _boundary
+{
+ my $size = shift || return "xYzZY";
+ require MIME::Base64;
+ my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
+ $b =~ s/[\W]/X/g; # ensure alnum only
+ $b;
+}
+
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Message - HTTP style message (base class)
+
+=head1 SYNOPSIS
+
+ use base 'HTTP::Message';
+
+=head1 DESCRIPTION
+
+An C<HTTP::Message> object contains some headers and a content body.
+The following methods are available:
+
+=over 4
+
+=item $mess = HTTP::Message->new
+
+=item $mess = HTTP::Message->new( $headers )
+
+=item $mess = HTTP::Message->new( $headers, $content )
+
+This constructs a new message object. Normally you would want
+construct C<HTTP::Request> or C<HTTP::Response> objects instead.
+
+The optional $header argument should be a reference to an
+C<HTTP::Headers> object or a plain array reference of key/value pairs.
+If an C<HTTP::Headers> object is provided then a copy of it will be
+embedded into the constructed message, i.e. it will not be owned and
+can be modified afterwards without affecting the message.
+
+The optional $content argument should be a string of bytes.
+
+=item $mess = HTTP::Message->parse( $str )
+
+This constructs a new message object by parsing the given string.
+
+=item $mess->headers
+
+Returns the embedded C<HTTP::Headers> object.
+
+=item $mess->headers_as_string
+
+=item $mess->headers_as_string( $eol )
+
+Call the as_string() method for the headers in the
+message. This will be the same as
+
+ $mess->headers->as_string
+
+but it will make your program a whole character shorter :-)
+
+=item $mess->content
+
+=item $mess->content( $bytes )
+
+The content() method sets the raw content if an argument is given. If no
+argument is given the content is not touched. In either case the
+original raw content is returned.
+
+Note that the content should be a string of bytes. Strings in perl
+can contain characters outside the range of a byte. The C<Encode>
+module can be used to turn such strings into a string of bytes.
+
+=item $mess->add_content( $bytes )
+
+The add_content() methods appends more data bytes to the end of the
+current content buffer.
+
+=item $mess->add_content_utf8( $string )
+
+The add_content_utf8() method appends the UTF-8 bytes representing the
+string to the end of the current content buffer.
+
+=item $mess->content_ref
+
+=item $mess->content_ref( \$bytes )
+
+The content_ref() method will return a reference to content buffer string.
+It can be more efficient to access the content this way if the content
+is huge, and it can even be used for direct manipulation of the content,
+for instance:
+
+ ${$res->content_ref} =~ s/\bfoo\b/bar/g;
+
+This example would modify the content buffer in-place.
+
+If an argument is passed it will setup the content to reference some
+external source. The content() and add_content() methods
+will automatically dereference scalar references passed this way. For
+other references content() will return the reference itself and
+add_content() will refuse to do anything.
+
+=item $mess->content_charset
+
+This returns the charset used by the content in the message. The
+charset is either found as the charset attribute of the
+C<Content-Type> header or by guessing.
+
+See L<http://www.w3.org/TR/REC-html40/charset.html#spec-char-encoding>
+for details about how charset is determined.
+
+=item $mess->decoded_content( %options )
+
+Returns the content with any C<Content-Encoding> undone and the raw
+content encoded to perl's Unicode strings. If the C<Content-Encoding>
+or C<charset> of the message is unknown this method will fail by
+returning C<undef>.
+
+The following options can be specified.
+
+=over
+
+=item C<charset>
+
+This override the charset parameter for text content. The value
+C<none> can used to suppress decoding of the charset.
+
+=item C<default_charset>
+
+This override the default charset guessed by content_charset() or
+if that fails "ISO-8859-1".
+
+=item C<alt_charset>
+
+If decoding fails because the charset specified in the Content-Type header
+isn't recognized by Perl's Encode module, then try decoding using this charset
+instead of failing. The C<alt_charset> might be specified as C<none> to simply
+return the string without any decoding of charset as alternative.
+
+=item C<charset_strict>
+
+Abort decoding if malformed characters is found in the content. By
+default you get the substitution character ("\x{FFFD}") in place of
+malformed characters.
+
+=item C<raise_error>
+
+If TRUE then raise an exception if not able to decode content. Reason
+might be that the specified C<Content-Encoding> or C<charset> is not
+supported. If this option is FALSE, then decoded_content() will return
+C<undef> on errors, but will still set $@.
+
+=item C<ref>
+
+If TRUE then a reference to decoded content is returned. This might
+be more efficient in cases where the decoded content is identical to
+the raw content as no data copying is required in this case.
+
+=back
+
+=item $mess->decodable
+
+=item HTTP::Message::decodable()
+
+This returns the encoding identifiers that decoded_content() can
+process. In scalar context returns a comma separated string of
+identifiers.
+
+This value is suitable for initializing the C<Accept-Encoding> request
+header field.
+
+=item $mess->decode
+
+This method tries to replace the content of the message with the
+decoded version and removes the C<Content-Encoding> header. Returns
+TRUE if successful and FALSE if not.
+
+If the message does not have a C<Content-Encoding> header this method
+does nothing and returns TRUE.
+
+Note that the content of the message is still bytes after this method
+has been called and you still need to call decoded_content() if you
+want to process its content as a string.
+
+=item $mess->encode( $encoding, ... )
+
+Apply the given encodings to the content of the message. Returns TRUE
+if successful. The "identity" (non-)encoding is always supported; other
+currently supported encodings, subject to availability of required
+additional modules, are "gzip", "deflate", "x-bzip2" and "base64".
+
+A successful call to this function will set the C<Content-Encoding>
+header.
+
+Note that C<multipart/*> or C<message/*> messages can't be encoded and
+this method will croak if you try.
+
+=item $mess->parts
+
+=item $mess->parts( @parts )
+
+=item $mess->parts( \@parts )
+
+Messages can be composite, i.e. contain other messages. The composite
+messages have a content type of C<multipart/*> or C<message/*>. This
+method give access to the contained messages.
+
+The argumentless form will return a list of C<HTTP::Message> objects.
+If the content type of $msg is not C<multipart/*> or C<message/*> then
+this will return the empty list. In scalar context only the first
+object is returned. The returned message parts should be regarded as
+read-only (future versions of this library might make it possible
+to modify the parent by modifying the parts).
+
+If the content type of $msg is C<message/*> then there will only be
+one part returned.
+
+If the content type is C<message/http>, then the return value will be
+either an C<HTTP::Request> or an C<HTTP::Response> object.
+
+If an @parts argument is given, then the content of the message will be
+modified. The array reference form is provided so that an empty list
+can be provided. The @parts array should contain C<HTTP::Message>
+objects. The @parts objects are owned by $mess after this call and
+should not be modified or made part of other messages.
+
+When updating the message with this method and the old content type of
+$mess is not C<multipart/*> or C<message/*>, then the content type is
+set to C<multipart/mixed> and all other content headers are cleared.
+
+This method will croak if the content type is C<message/*> and more
+than one part is provided.
+
+=item $mess->add_part( $part )
+
+This will add a part to a message. The $part argument should be
+another C<HTTP::Message> object. If the previous content type of
+$mess is not C<multipart/*> then the old content (together with all
+content headers) will be made part #1 and the content type made
+C<multipart/mixed> before the new part is added. The $part object is
+owned by $mess after this call and should not be modified or made part
+of other messages.
+
+There is no return value.
+
+=item $mess->clear
+
+Will clear the headers and set the content to the empty string. There
+is no return value
+
+=item $mess->protocol
+
+=item $mess->protocol( $proto )
+
+Sets the HTTP protocol used for the message. The protocol() is a string
+like C<HTTP/1.0> or C<HTTP/1.1>.
+
+=item $mess->clone
+
+Returns a copy of the message object.
+
+=item $mess->as_string
+
+=item $mess->as_string( $eol )
+
+Returns the message formatted as a single string.
+
+The optional $eol parameter specifies the line ending sequence to use.
+The default is "\n". If no $eol is given then as_string will ensure
+that the returned string is newline terminated (even when the message
+content is not). No extra newline is appended if an explicit $eol is
+passed.
+
+=item $mess->dump( %opt )
+
+Returns the message formatted as a string. In void context print the string.
+
+This differs from C<< $mess->as_string >> in that it escapes the bytes
+of the content so that it's safe to print them and it limits how much
+content to print. The escapes syntax used is the same as for Perl's
+double quoted strings. If there is no content the string "(no
+content)" is shown in its place.
+
+Options to influence the output can be passed as key/value pairs. The
+following options are recognized:
+
+=over
+
+=item maxlength => $num
+
+How much of the content to show. The default is 512. Set this to 0
+for unlimited.
+
+If the content is longer then the string is chopped at the limit and
+the string "...\n(### more bytes not shown)" appended.
+
+=item prefix => $str
+
+A string that will be prefixed to each line of the dump.
+
+=back
+
+=back
+
+All methods unknown to C<HTTP::Message> itself are delegated to the
+C<HTTP::Headers> object that is part of every message. This allows
+convenient access to these methods. Refer to L<HTTP::Headers> for
+details of these methods:
+
+ $mess->header( $field => $val )
+ $mess->push_header( $field => $val )
+ $mess->init_header( $field => $val )
+ $mess->remove_header( $field )
+ $mess->remove_content_headers
+ $mess->header_field_names
+ $mess->scan( \&doit )
+
+ $mess->date
+ $mess->expires
+ $mess->if_modified_since
+ $mess->if_unmodified_since
+ $mess->last_modified
+ $mess->content_type
+ $mess->content_encoding
+ $mess->content_length
+ $mess->content_language
+ $mess->title
+ $mess->user_agent
+ $mess->server
+ $mess->from
+ $mess->referer
+ $mess->www_authenticate
+ $mess->authorization
+ $mess->proxy_authorization
+ $mess->authorization_basic
+ $mess->proxy_authorization_basic
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Negotiate;
+
+$VERSION = "5.835";
+sub Version { $VERSION; }
+
+require 5.002;
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(choose);
+
+require HTTP::Headers;
+
+$DEBUG = 0;
+
+sub choose ($;$)
+{
+ my($variants, $request) = @_;
+ my(%accept);
+
+ unless (defined $request) {
+ # Create a request object from the CGI environment variables
+ $request = HTTP::Headers->new;
+ $request->header('Accept', $ENV{HTTP_ACCEPT})
+ if $ENV{HTTP_ACCEPT};
+ $request->header('Accept-Charset', $ENV{HTTP_ACCEPT_CHARSET})
+ if $ENV{HTTP_ACCEPT_CHARSET};
+ $request->header('Accept-Encoding', $ENV{HTTP_ACCEPT_ENCODING})
+ if $ENV{HTTP_ACCEPT_ENCODING};
+ $request->header('Accept-Language', $ENV{HTTP_ACCEPT_LANGUAGE})
+ if $ENV{HTTP_ACCEPT_LANGUAGE};
+ }
+
+ # Get all Accept values from the request. Build a hash initialized
+ # like this:
+ #
+ # %accept = ( type => { 'audio/*' => { q => 0.2, mbx => 20000 },
+ # 'audio/basic' => { q => 1 },
+ # },
+ # language => { 'no' => { q => 1 },
+ # }
+ # );
+
+ $request->scan(sub {
+ my($key, $val) = @_;
+
+ my $type;
+ if ($key =~ s/^Accept-//) {
+ $type = lc($key);
+ }
+ elsif ($key eq "Accept") {
+ $type = "type";
+ }
+ else {
+ return;
+ }
+
+ $val =~ s/\s+//g;
+ my $default_q = 1;
+ for my $name (split(/,/, $val)) {
+ my(%param, $param);
+ if ($name =~ s/;(.*)//) {
+ for $param (split(/;/, $1)) {
+ my ($pk, $pv) = split(/=/, $param, 2);
+ $param{lc $pk} = $pv;
+ }
+ }
+ $name = lc $name;
+ if (defined $param{'q'}) {
+ $param{'q'} = 1 if $param{'q'} > 1;
+ $param{'q'} = 0 if $param{'q'} < 0;
+ }
+ else {
+ $param{'q'} = $default_q;
+
+ # This makes sure that the first ones are slightly better off
+ # and therefore more likely to be chosen.
+ $default_q -= 0.0001;
+ }
+ $accept{$type}{$name} = \%param;
+ }
+ });
+
+ # Check if any of the variants specify a language. We do this
+ # because it influences how we treat those without (they default to
+ # 0.5 instead of 1).
+ my $any_lang = 0;
+ for $var (@$variants) {
+ if ($var->[5]) {
+ $any_lang = 1;
+ last;
+ }
+ }
+
+ if ($DEBUG) {
+ print "Negotiation parameters in the request\n";
+ for $type (keys %accept) {
+ print " $type:\n";
+ for $name (keys %{$accept{$type}}) {
+ print " $name\n";
+ for $pv (keys %{$accept{$type}{$name}}) {
+ print " $pv = $accept{$type}{$name}{$pv}\n";
+ }
+ }
+ }
+ }
+
+ my @Q = (); # This is where we collect the results of the
+ # quality calculations
+
+ # Calculate quality for all the variants that are available.
+ for (@$variants) {
+ my($id, $qs, $ct, $enc, $cs, $lang, $bs) = @$_;
+ $qs = 1 unless defined $qs;
+ $ct = '' unless defined $ct;
+ $bs = 0 unless defined $bs;
+ $lang = lc($lang) if $lang; # lg tags are always case-insensitive
+ if ($DEBUG) {
+ print "\nEvaluating $id (ct='$ct')\n";
+ printf " qs = %.3f\n", $qs;
+ print " enc = $enc\n" if $enc && !ref($enc);
+ print " enc = @$enc\n" if $enc && ref($enc);
+ print " cs = $cs\n" if $cs;
+ print " lang = $lang\n" if $lang;
+ print " bs = $bs\n" if $bs;
+ }
+
+ # Calculate encoding quality
+ my $qe = 1;
+ # If the variant has no assigned Content-Encoding, or if no
+ # Accept-Encoding field is present, then the value assigned
+ # is "qe=1". If *all* of the variant's content encodings
+ # are listed in the Accept-Encoding field, then the value
+ # assigned is "qw=1". If *any* of the variant's content
+ # encodings are not listed in the provided Accept-Encoding
+ # field, then the value assigned is "qe=0"
+ if (exists $accept{'encoding'} && $enc) {
+ my @enc = ref($enc) ? @$enc : ($enc);
+ for (@enc) {
+ print "Is encoding $_ accepted? " if $DEBUG;
+ unless(exists $accept{'encoding'}{$_}) {
+ print "no\n" if $DEBUG;
+ $qe = 0;
+ last;
+ }
+ else {
+ print "yes\n" if $DEBUG;
+ }
+ }
+ }
+
+ # Calculate charset quality
+ my $qc = 1;
+ # If the variant's media-type has no charset parameter,
+ # or the variant's charset is US-ASCII, or if no Accept-Charset
+ # field is present, then the value assigned is "qc=1". If the
+ # variant's charset is listed in the Accept-Charset field,
+ # then the value assigned is "qc=1. Otherwise, if the variant's
+ # charset is not listed in the provided Accept-Encoding field,
+ # then the value assigned is "qc=0".
+ if (exists $accept{'charset'} && $cs && $cs ne 'us-ascii' ) {
+ $qc = 0 unless $accept{'charset'}{$cs};
+ }
+
+ # Calculate language quality
+ my $ql = 1;
+ if ($lang && exists $accept{'language'}) {
+ my @lang = ref($lang) ? @$lang : ($lang);
+ # If any of the variant's content languages are listed
+ # in the Accept-Language field, the the value assigned is
+ # the largest of the "q" parameter values for those language
+ # tags.
+ my $q = undef;
+ for (@lang) {
+ next unless exists $accept{'language'}{$_};
+ my $this_q = $accept{'language'}{$_}{'q'};
+ $q = $this_q unless defined $q;
+ $q = $this_q if $this_q > $q;
+ }
+ if(defined $q) {
+ $DEBUG and print " -- Exact language match at q=$q\n";
+ }
+ else {
+ # If there was no exact match and at least one of
+ # the Accept-Language field values is a complete
+ # subtag prefix of the content language tag(s), then
+ # the "q" parameter value of the largest matching
+ # prefix is used.
+ $DEBUG and print " -- No exact language match\n";
+ my $selected = undef;
+ for $al (keys %{ $accept{'language'} }) {
+ if (index($al, "$lang-") == 0) {
+ # $lang starting with $al isn't enough, or else
+ # Accept-Language: hu (Hungarian) would seem
+ # to accept a document in hup (Hupa)
+ $DEBUG and print " -- $al ISA $lang\n";
+ $selected = $al unless defined $selected;
+ $selected = $al if length($al) > length($selected);
+ }
+ else {
+ $DEBUG and print " -- $lang isn't a $al\n";
+ }
+ }
+ $q = $accept{'language'}{$selected}{'q'} if $selected;
+
+ # If none of the variant's content language tags or
+ # tag prefixes are listed in the provided
+ # Accept-Language field, then the value assigned
+ # is "ql=0.001"
+ $q = 0.001 unless defined $q;
+ }
+ $ql = $q;
+ }
+ else {
+ $ql = 0.5 if $any_lang && exists $accept{'language'};
+ }
+
+ my $q = 1;
+ my $mbx = undef;
+ # If no Accept field is given, then the value assigned is "q=1".
+ # If at least one listed media range matches the variant's media
+ # type, then the "q" parameter value assigned to the most specific
+ # of those matched is used (e.g. "text/html;version=3.0" is more
+ # specific than "text/html", which is more specific than "text/*",
+ # which in turn is more specific than "*/*"). If not media range
+ # in the provided Accept field matches the variant's media type,
+ # then the value assigned is "q=0".
+ if (exists $accept{'type'} && $ct) {
+ # First we clean up our content-type
+ $ct =~ s/\s+//g;
+ my $params = "";
+ $params = $1 if $ct =~ s/;(.*)//;
+ my($type, $subtype) = split("/", $ct, 2);
+ my %param = ();
+ for $param (split(/;/, $params)) {
+ my($pk,$pv) = split(/=/, $param, 2);
+ $param{$pk} = $pv;
+ }
+
+ my $sel_q = undef;
+ my $sel_mbx = undef;
+ my $sel_specificness = 0;
+
+ ACCEPT_TYPE:
+ for $at (keys %{ $accept{'type'} }) {
+ print "Consider $at...\n" if $DEBUG;
+ my($at_type, $at_subtype) = split("/", $at, 2);
+ # Is it a match on the type
+ next if $at_type ne '*' && $at_type ne $type;
+ next if $at_subtype ne '*' && $at_subtype ne $subtype;
+ my $specificness = 0;
+ $specificness++ if $at_type ne '*';
+ $specificness++ if $at_subtype ne '*';
+ # Let's see if content-type parameters also match
+ while (($pk, $pv) = each %param) {
+ print "Check if $pk = $pv is true\n" if $DEBUG;
+ next unless exists $accept{'type'}{$at}{$pk};
+ next ACCEPT_TYPE
+ unless $accept{'type'}{$at}{$pk} eq $pv;
+ print "yes it is!!\n" if $DEBUG;
+ $specificness++;
+ }
+ print "Hurray, type match with specificness = $specificness\n"
+ if $DEBUG;
+
+ if (!defined($sel_q) || $sel_specificness < $specificness) {
+ $sel_q = $accept{'type'}{$at}{'q'};
+ $sel_mbx = $accept{'type'}{$at}{'mbx'};
+ $sel_specificness = $specificness;
+ }
+ }
+ $q = $sel_q || 0;
+ $mbx = $sel_mbx;
+ }
+
+ my $Q;
+ if (!defined($mbx) || $mbx >= $bs) {
+ $Q = $qs * $qe * $qc * $ql * $q;
+ }
+ else {
+ $Q = 0;
+ print "Variant's size is too large ==> Q=0\n" if $DEBUG;
+ }
+
+ if ($DEBUG) {
+ $mbx = "undef" unless defined $mbx;
+ printf "Q=%.4f", $Q;
+ print " (q=$q, mbx=$mbx, qe=$qe, qc=$qc, ql=$ql, qs=$qs)\n";
+ }
+
+ push(@Q, [$id, $Q, $bs]);
+ }
+
+
+ @Q = sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @Q;
+
+ return @Q if wantarray;
+ return undef unless @Q;
+ return undef if $Q[0][1] == 0;
+ $Q[0][0];
+}
+
+1;
+
+__END__
+
+
+=head1 NAME
+
+HTTP::Negotiate - choose a variant to serve
+
+=head1 SYNOPSIS
+
+ use HTTP::Negotiate qw(choose);
+
+ # ID QS Content-Type Encoding Char-Set Lang Size
+ $variants =
+ [['var1', 1.000, 'text/html', undef, 'iso-8859-1', 'en', 3000],
+ ['var2', 0.950, 'text/plain', 'gzip', 'us-ascii', 'no', 400],
+ ['var3', 0.3, 'image/gif', undef, undef, undef, 43555],
+ ];
+
+ @preferred = choose($variants, $request_headers);
+ $the_one = choose($variants);
+
+=head1 DESCRIPTION
+
+This module provides a complete implementation of the HTTP content
+negotiation algorithm specified in F<draft-ietf-http-v11-spec-00.ps>
+chapter 12. Content negotiation allows for the selection of a
+preferred content representation based upon attributes of the
+negotiable variants and the value of the various Accept* header fields
+in the request.
+
+The variants are ordered by preference by calling the function
+choose().
+
+The first parameter is reference to an array of the variants to
+choose among.
+Each element in this array is an array with the values [$id, $qs,
+$content_type, $content_encoding, $charset, $content_language,
+$content_length] whose meanings are described
+below. The $content_encoding and $content_language can be either a
+single scalar value or an array reference if there are several values.
+
+The second optional parameter is either a HTTP::Headers or a HTTP::Request
+object which is searched for "Accept*" headers. If this
+parameter is missing, then the accept specification is initialized
+from the CGI environment variables HTTP_ACCEPT, HTTP_ACCEPT_CHARSET,
+HTTP_ACCEPT_ENCODING and HTTP_ACCEPT_LANGUAGE.
+
+In an array context, choose() returns a list of [variant
+identifier, calculated quality, size] tuples. The values are sorted by
+quality, highest quality first. If the calculated quality is the same
+for two variants, then they are sorted by size (smallest first). I<E.g.>:
+
+ (['var1', 1, 2000], ['var2', 0.3, 512], ['var3', 0.3, 1024]);
+
+Note that also zero quality variants are included in the return list
+even if these should never be served to the client.
+
+In a scalar context, it returns the identifier of the variant with the
+highest score or C<undef> if none have non-zero quality.
+
+If the $HTTP::Negotiate::DEBUG variable is set to TRUE, then a lot of
+noise is generated on STDOUT during evaluation of choose().
+
+=head1 VARIANTS
+
+A variant is described by a list of the following values. If the
+attribute does not make sense or is unknown for a variant, then use
+C<undef> instead.
+
+=over 3
+
+=item identifier
+
+This is a string that you use as the name for the variant. This
+identifier for the preferred variants returned by choose().
+
+=item qs
+
+This is a number between 0.000 and 1.000 that describes the "source
+quality". This is what F<draft-ietf-http-v11-spec-00.ps> says about this
+value:
+
+Source quality is measured by the content provider as representing the
+amount of degradation from the original source. For example, a
+picture in JPEG form would have a lower qs when translated to the XBM
+format, and much lower qs when translated to an ASCII-art
+representation. Note, however, that this is a function of the source
+- an original piece of ASCII-art may degrade in quality if it is
+captured in JPEG form. The qs values should be assigned to each
+variant by the content provider; if no qs value has been assigned, the
+default is generally "qs=1".
+
+=item content-type
+
+This is the media type of the variant. The media type does not
+include a charset attribute, but might contain other parameters.
+Examples are:
+
+ text/html
+ text/html;version=2.0
+ text/plain
+ image/gif
+ image/jpg
+
+=item content-encoding
+
+This is one or more content encodings that has been applied to the
+variant. The content encoding is generally used as a modifier to the
+content media type. The most common content encodings are:
+
+ gzip
+ compress
+
+=item content-charset
+
+This is the character set used when the variant contains text.
+The charset value should generally be C<undef> or one of these:
+
+ us-ascii
+ iso-8859-1 ... iso-8859-9
+ iso-2022-jp
+ iso-2022-jp-2
+ iso-2022-kr
+ unicode-1-1
+ unicode-1-1-utf-7
+ unicode-1-1-utf-8
+
+=item content-language
+
+This describes one or more languages that are used in the variant.
+Language is described like this in F<draft-ietf-http-v11-spec-00.ps>: A
+language is in this context a natural language spoken, written, or
+otherwise conveyed by human beings for communication of information to
+other human beings. Computer languages are explicitly excluded.
+
+The language tags are defined by RFC 3066. Examples
+are:
+
+ no Norwegian
+ en International English
+ en-US US English
+ en-cockney
+
+=item content-length
+
+This is the number of bytes used to represent the content.
+
+=back
+
+=head1 ACCEPT HEADERS
+
+The following Accept* headers can be used for describing content
+preferences in a request (This description is an edited extract from
+F<draft-ietf-http-v11-spec-00.ps>):
+
+=over 3
+
+=item Accept
+
+This header can be used to indicate a list of media ranges which are
+acceptable as a response to the request. The "*" character is used to
+group media types into ranges, with "*/*" indicating all media types
+and "type/*" indicating all subtypes of that type.
+
+The parameter q is used to indicate the quality factor, which
+represents the user's preference for that range of media types. The
+parameter mbx gives the maximum acceptable size of the response
+content. The default values are: q=1 and mbx=infinity. If no Accept
+header is present, then the client accepts all media types with q=1.
+
+For example:
+
+ Accept: audio/*;q=0.2;mbx=200000, audio/basic
+
+would mean: "I prefer audio/basic (of any size), but send me any audio
+type if it is the best available after an 80% mark-down in quality and
+its size is less than 200000 bytes"
+
+
+=item Accept-Charset
+
+Used to indicate what character sets are acceptable for the response.
+The "us-ascii" character set is assumed to be acceptable for all user
+agents. If no Accept-Charset field is given, the default is that any
+charset is acceptable. Example:
+
+ Accept-Charset: iso-8859-1, unicode-1-1
+
+
+=item Accept-Encoding
+
+Restricts the Content-Encoding values which are acceptable in the
+response. If no Accept-Encoding field is present, the server may
+assume that the client will accept any content encoding. An empty
+Accept-Encoding means that no content encoding is acceptable. Example:
+
+ Accept-Encoding: compress, gzip
+
+
+=item Accept-Language
+
+This field is similar to Accept, but restricts the set of natural
+languages that are preferred in a response. Each language may be
+given an associated quality value which represents an estimate of the
+user's comprehension of that language. For example:
+
+ Accept-Language: no, en-gb;q=0.8, de;q=0.55
+
+would mean: "I prefer Norwegian, but will accept British English (with
+80% comprehension) or German (with 55% comprehension).
+
+=back
+
+
+=head1 COPYRIGHT
+
+Copyright 1996,2001 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Gisle Aas <gisle@aas.no>
+
+=cut
--- /dev/null
+package HTTP::Request;
+
+require HTTP::Message;
+@ISA = qw(HTTP::Message);
+$VERSION = "5.827";
+
+use strict;
+
+
+
+sub new
+{
+ my($class, $method, $uri, $header, $content) = @_;
+ my $self = $class->SUPER::new($header, $content);
+ $self->method($method);
+ $self->uri($uri);
+ $self;
+}
+
+
+sub parse
+{
+ my($class, $str) = @_;
+ my $request_line;
+ if ($str =~ s/^(.*)\n//) {
+ $request_line = $1;
+ }
+ else {
+ $request_line = $str;
+ $str = "";
+ }
+
+ my $self = $class->SUPER::parse($str);
+ my($method, $uri, $protocol) = split(' ', $request_line);
+ $self->method($method) if defined($method);
+ $self->uri($uri) if defined($uri);
+ $self->protocol($protocol) if $protocol;
+ $self;
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $clone = bless $self->SUPER::clone, ref($self);
+ $clone->method($self->method);
+ $clone->uri($self->uri);
+ $clone;
+}
+
+
+sub method
+{
+ shift->_elem('_method', @_);
+}
+
+
+sub uri
+{
+ my $self = shift;
+ my $old = $self->{'_uri'};
+ if (@_) {
+ my $uri = shift;
+ if (!defined $uri) {
+ # that's ok
+ }
+ elsif (ref $uri) {
+ Carp::croak("A URI can't be a " . ref($uri) . " reference")
+ if ref($uri) eq 'HASH' or ref($uri) eq 'ARRAY';
+ Carp::croak("Can't use a " . ref($uri) . " object as a URI")
+ unless $uri->can('scheme');
+ $uri = $uri->clone;
+ unless ($HTTP::URI_CLASS eq "URI") {
+ # Argh!! Hate this... old LWP legacy!
+ eval { local $SIG{__DIE__}; $uri = $uri->abs; };
+ die $@ if $@ && $@ !~ /Missing base argument/;
+ }
+ }
+ else {
+ $uri = $HTTP::URI_CLASS->new($uri);
+ }
+ $self->{'_uri'} = $uri;
+ delete $self->{'_uri_canonical'};
+ }
+ $old;
+}
+
+*url = \&uri; # legacy
+
+sub uri_canonical
+{
+ my $self = shift;
+ return $self->{'_uri_canonical'} ||= $self->{'_uri'}->canonical;
+}
+
+
+sub accept_decodable
+{
+ my $self = shift;
+ $self->header("Accept-Encoding", scalar($self->decodable));
+}
+
+sub as_string
+{
+ my $self = shift;
+ my($eol) = @_;
+ $eol = "\n" unless defined $eol;
+
+ my $req_line = $self->method || "-";
+ my $uri = $self->uri;
+ $uri = (defined $uri) ? $uri->as_string : "-";
+ $req_line .= " $uri";
+ my $proto = $self->protocol;
+ $req_line .= " $proto" if $proto;
+
+ return join($eol, $req_line, $self->SUPER::as_string(@_));
+}
+
+sub dump
+{
+ my $self = shift;
+ my @pre = ($self->method || "-", $self->uri || "-");
+ if (my $prot = $self->protocol) {
+ push(@pre, $prot);
+ }
+
+ return $self->SUPER::dump(
+ preheader => join(" ", @pre),
+ @_,
+ );
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Request - HTTP style request message
+
+=head1 SYNOPSIS
+
+ require HTTP::Request;
+ $request = HTTP::Request->new(GET => 'http://www.example.com/');
+
+and usually used like this:
+
+ $ua = LWP::UserAgent->new;
+ $response = $ua->request($request);
+
+=head1 DESCRIPTION
+
+C<HTTP::Request> is a class encapsulating HTTP style requests,
+consisting of a request line, some headers, and a content body. Note
+that the LWP library uses HTTP style requests even for non-HTTP
+protocols. Instances of this class are usually passed to the
+request() method of an C<LWP::UserAgent> object.
+
+C<HTTP::Request> is a subclass of C<HTTP::Message> and therefore
+inherits its methods. The following additional methods are available:
+
+=over 4
+
+=item $r = HTTP::Request->new( $method, $uri )
+
+=item $r = HTTP::Request->new( $method, $uri, $header )
+
+=item $r = HTTP::Request->new( $method, $uri, $header, $content )
+
+Constructs a new C<HTTP::Request> object describing a request on the
+object $uri using method $method. The $method argument must be a
+string. The $uri argument can be either a string, or a reference to a
+C<URI> object. The optional $header argument should be a reference to
+an C<HTTP::Headers> object or a plain array reference of key/value
+pairs. The optional $content argument should be a string of bytes.
+
+=item $r = HTTP::Request->parse( $str )
+
+This constructs a new request object by parsing the given string.
+
+=item $r->method
+
+=item $r->method( $val )
+
+This is used to get/set the method attribute. The method should be a
+short string like "GET", "HEAD", "PUT" or "POST".
+
+=item $r->uri
+
+=item $r->uri( $val )
+
+This is used to get/set the uri attribute. The $val can be a
+reference to a URI object or a plain string. If a string is given,
+then it should be parseable as an absolute URI.
+
+=item $r->header( $field )
+
+=item $r->header( $field => $value )
+
+This is used to get/set header values and it is inherited from
+C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for
+details and other similar methods that can be used to access the
+headers.
+
+=item $r->accept_decodable
+
+This will set the C<Accept-Encoding> header to the list of encodings
+that decoded_content() can decode.
+
+=item $r->content
+
+=item $r->content( $bytes )
+
+This is used to get/set the content and it is inherited from the
+C<HTTP::Message> base class. See L<HTTP::Message> for details and
+other methods that can be used to access the content.
+
+Note that the content should be a string of bytes. Strings in perl
+can contain characters outside the range of a byte. The C<Encode>
+module can be used to turn such strings into a string of bytes.
+
+=item $r->as_string
+
+=item $r->as_string( $eol )
+
+Method returning a textual representation of the request.
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Request::Common>,
+L<HTTP::Response>
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Request::Common;
+
+use strict;
+use vars qw(@EXPORT @EXPORT_OK $VERSION $DYNAMIC_FILE_UPLOAD);
+
+$DYNAMIC_FILE_UPLOAD ||= 0; # make it defined (don't know why)
+
+require Exporter;
+*import = \&Exporter::import;
+@EXPORT =qw(GET HEAD PUT POST);
+@EXPORT_OK = qw($DYNAMIC_FILE_UPLOAD DELETE);
+
+require HTTP::Request;
+use Carp();
+
+$VERSION = "5.824";
+
+my $CRLF = "\015\012"; # "\r\n" is not portable
+
+sub GET { _simple_req('GET', @_); }
+sub HEAD { _simple_req('HEAD', @_); }
+sub PUT { _simple_req('PUT' , @_); }
+sub DELETE { _simple_req('DELETE', @_); }
+
+sub POST
+{
+ my $url = shift;
+ my $req = HTTP::Request->new(POST => $url);
+ my $content;
+ $content = shift if @_ and ref $_[0];
+ my($k, $v);
+ while (($k,$v) = splice(@_, 0, 2)) {
+ if (lc($k) eq 'content') {
+ $content = $v;
+ }
+ else {
+ $req->push_header($k, $v);
+ }
+ }
+ my $ct = $req->header('Content-Type');
+ unless ($ct) {
+ $ct = 'application/x-www-form-urlencoded';
+ }
+ elsif ($ct eq 'form-data') {
+ $ct = 'multipart/form-data';
+ }
+
+ if (ref $content) {
+ if ($ct =~ m,^multipart/form-data\s*(;|$),i) {
+ require HTTP::Headers::Util;
+ my @v = HTTP::Headers::Util::split_header_words($ct);
+ Carp::carp("Multiple Content-Type headers") if @v > 1;
+ @v = @{$v[0]};
+
+ my $boundary;
+ my $boundary_index;
+ for (my @tmp = @v; @tmp;) {
+ my($k, $v) = splice(@tmp, 0, 2);
+ if ($k eq "boundary") {
+ $boundary = $v;
+ $boundary_index = @v - @tmp - 1;
+ last;
+ }
+ }
+
+ ($content, $boundary) = form_data($content, $boundary, $req);
+
+ if ($boundary_index) {
+ $v[$boundary_index] = $boundary;
+ }
+ else {
+ push(@v, boundary => $boundary);
+ }
+
+ $ct = HTTP::Headers::Util::join_header_words(@v);
+ }
+ else {
+ # We use a temporary URI object to format
+ # the application/x-www-form-urlencoded content.
+ require URI;
+ my $url = URI->new('http:');
+ $url->query_form(ref($content) eq "HASH" ? %$content : @$content);
+ $content = $url->query;
+ }
+ }
+
+ $req->header('Content-Type' => $ct); # might be redundant
+ if (defined($content)) {
+ $req->header('Content-Length' =>
+ length($content)) unless ref($content);
+ $req->content($content);
+ }
+ else {
+ $req->header('Content-Length' => 0);
+ }
+ $req;
+}
+
+
+sub _simple_req
+{
+ my($method, $url) = splice(@_, 0, 2);
+ my $req = HTTP::Request->new($method => $url);
+ my($k, $v);
+ my $content;
+ while (($k,$v) = splice(@_, 0, 2)) {
+ if (lc($k) eq 'content') {
+ $req->add_content($v);
+ $content++;
+ }
+ else {
+ $req->push_header($k, $v);
+ }
+ }
+ if ($content && !defined($req->header("Content-Length"))) {
+ $req->header("Content-Length", length(${$req->content_ref}));
+ }
+ $req;
+}
+
+
+sub form_data # RFC1867
+{
+ my($data, $boundary, $req) = @_;
+ my @data = ref($data) eq "HASH" ? %$data : @$data; # copy
+ my $fhparts;
+ my @parts;
+ my($k,$v);
+ while (($k,$v) = splice(@data, 0, 2)) {
+ if (!ref($v)) {
+ $k =~ s/([\\\"])/\\$1/g; # escape quotes and backslashes
+ push(@parts,
+ qq(Content-Disposition: form-data; name="$k"$CRLF$CRLF$v));
+ }
+ else {
+ my($file, $usename, @headers) = @$v;
+ unless (defined $usename) {
+ $usename = $file;
+ $usename =~ s,.*/,, if defined($usename);
+ }
+ $k =~ s/([\\\"])/\\$1/g;
+ my $disp = qq(form-data; name="$k");
+ if (defined($usename) and length($usename)) {
+ $usename =~ s/([\\\"])/\\$1/g;
+ $disp .= qq(; filename="$usename");
+ }
+ my $content = "";
+ my $h = HTTP::Headers->new(@headers);
+ if ($file) {
+ open(my $fh, "<", $file) or Carp::croak("Can't open file $file: $!");
+ binmode($fh);
+ if ($DYNAMIC_FILE_UPLOAD) {
+ # will read file later, close it now in order to
+ # not accumulate to many open file handles
+ close($fh);
+ $content = \$file;
+ }
+ else {
+ local($/) = undef; # slurp files
+ $content = <$fh>;
+ close($fh);
+ }
+ unless ($h->header("Content-Type")) {
+ require LWP::MediaTypes;
+ LWP::MediaTypes::guess_media_type($file, $h);
+ }
+ }
+ if ($h->header("Content-Disposition")) {
+ # just to get it sorted first
+ $disp = $h->header("Content-Disposition");
+ $h->remove_header("Content-Disposition");
+ }
+ if ($h->header("Content")) {
+ $content = $h->header("Content");
+ $h->remove_header("Content");
+ }
+ my $head = join($CRLF, "Content-Disposition: $disp",
+ $h->as_string($CRLF),
+ "");
+ if (ref $content) {
+ push(@parts, [$head, $$content]);
+ $fhparts++;
+ }
+ else {
+ push(@parts, $head . $content);
+ }
+ }
+ }
+ return ("", "none") unless @parts;
+
+ my $content;
+ if ($fhparts) {
+ $boundary = boundary(10) # hopefully enough randomness
+ unless $boundary;
+
+ # add the boundaries to the @parts array
+ for (1..@parts-1) {
+ splice(@parts, $_*2-1, 0, "$CRLF--$boundary$CRLF");
+ }
+ unshift(@parts, "--$boundary$CRLF");
+ push(@parts, "$CRLF--$boundary--$CRLF");
+
+ # See if we can generate Content-Length header
+ my $length = 0;
+ for (@parts) {
+ if (ref $_) {
+ my ($head, $f) = @$_;
+ my $file_size;
+ unless ( -f $f && ($file_size = -s _) ) {
+ # The file is either a dynamic file like /dev/audio
+ # or perhaps a file in the /proc file system where
+ # stat may return a 0 size even though reading it
+ # will produce data. So we cannot make
+ # a Content-Length header.
+ undef $length;
+ last;
+ }
+ $length += $file_size + length $head;
+ }
+ else {
+ $length += length;
+ }
+ }
+ $length && $req->header('Content-Length' => $length);
+
+ # set up a closure that will return content piecemeal
+ $content = sub {
+ for (;;) {
+ unless (@parts) {
+ defined $length && $length != 0 &&
+ Carp::croak "length of data sent did not match calculated Content-Length header. Probably because uploaded file changed in size during transfer.";
+ return;
+ }
+ my $p = shift @parts;
+ unless (ref $p) {
+ $p .= shift @parts while @parts && !ref($parts[0]);
+ defined $length && ($length -= length $p);
+ return $p;
+ }
+ my($buf, $fh) = @$p;
+ unless (ref($fh)) {
+ my $file = $fh;
+ undef($fh);
+ open($fh, "<", $file) || Carp::croak("Can't open file $file: $!");
+ binmode($fh);
+ }
+ my $buflength = length $buf;
+ my $n = read($fh, $buf, 2048, $buflength);
+ if ($n) {
+ $buflength += $n;
+ unshift(@parts, ["", $fh]);
+ }
+ else {
+ close($fh);
+ }
+ if ($buflength) {
+ defined $length && ($length -= $buflength);
+ return $buf
+ }
+ }
+ };
+
+ }
+ else {
+ $boundary = boundary() unless $boundary;
+
+ my $bno = 0;
+ CHECK_BOUNDARY:
+ {
+ for (@parts) {
+ if (index($_, $boundary) >= 0) {
+ # must have a better boundary
+ $boundary = boundary(++$bno);
+ redo CHECK_BOUNDARY;
+ }
+ }
+ last;
+ }
+ $content = "--$boundary$CRLF" .
+ join("$CRLF--$boundary$CRLF", @parts) .
+ "$CRLF--$boundary--$CRLF";
+ }
+
+ wantarray ? ($content, $boundary) : $content;
+}
+
+
+sub boundary
+{
+ my $size = shift || return "xYzZY";
+ require MIME::Base64;
+ my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
+ $b =~ s/[\W]/X/g; # ensure alnum only
+ $b;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Request::Common - Construct common HTTP::Request objects
+
+=head1 SYNOPSIS
+
+ use HTTP::Request::Common;
+ $ua = LWP::UserAgent->new;
+ $ua->request(GET 'http://www.sn.no/');
+ $ua->request(POST 'http://somewhere/foo', [foo => bar, bar => foo]);
+
+=head1 DESCRIPTION
+
+This module provide functions that return newly created C<HTTP::Request>
+objects. These functions are usually more convenient to use than the
+standard C<HTTP::Request> constructor for the most common requests. The
+following functions are provided:
+
+=over 4
+
+=item GET $url
+
+=item GET $url, Header => Value,...
+
+The GET() function returns an C<HTTP::Request> object initialized with
+the "GET" method and the specified URL. It is roughly equivalent to the
+following call
+
+ HTTP::Request->new(
+ GET => $url,
+ HTTP::Headers->new(Header => Value,...),
+ )
+
+but is less cluttered. What is different is that a header named
+C<Content> will initialize the content part of the request instead of
+setting a header field. Note that GET requests should normally not
+have a content, so this hack makes more sense for the PUT() and POST()
+functions described below.
+
+The get(...) method of C<LWP::UserAgent> exists as a shortcut for
+$ua->request(GET ...).
+
+=item HEAD $url
+
+=item HEAD $url, Header => Value,...
+
+Like GET() but the method in the request is "HEAD".
+
+The head(...) method of "LWP::UserAgent" exists as a shortcut for
+$ua->request(HEAD ...).
+
+=item PUT $url
+
+=item PUT $url, Header => Value,...
+
+=item PUT $url, Header => Value,..., Content => $content
+
+Like GET() but the method in the request is "PUT".
+
+The content of the request can be specified using the "Content"
+pseudo-header. This steals a bit of the header field namespace as
+there is no way to directly specify a header that is actually called
+"Content". If you really need this you must update the request
+returned in a separate statement.
+
+=item DELETE $url
+
+=item DELETE $url, Header => Value,...
+
+Like GET() but the method in the request is "DELETE". This function
+is not exported by default.
+
+=item POST $url
+
+=item POST $url, Header => Value,...
+
+=item POST $url, $form_ref, Header => Value,...
+
+=item POST $url, Header => Value,..., Content => $form_ref
+
+=item POST $url, Header => Value,..., Content => $content
+
+This works mostly like PUT() with "POST" as the method, but this
+function also takes a second optional array or hash reference
+parameter $form_ref. As for PUT() the content can also be specified
+directly using the "Content" pseudo-header, and you may also provide
+the $form_ref this way.
+
+The $form_ref argument can be used to pass key/value pairs for the
+form content. By default we will initialize a request using the
+C<application/x-www-form-urlencoded> content type. This means that
+you can emulate a HTML E<lt>form> POSTing like this:
+
+ POST 'http://www.perl.org/survey.cgi',
+ [ name => 'Gisle Aas',
+ email => 'gisle@aas.no',
+ gender => 'M',
+ born => '1964',
+ perc => '3%',
+ ];
+
+This will create a HTTP::Request object that looks like this:
+
+ POST http://www.perl.org/survey.cgi
+ Content-Length: 66
+ Content-Type: application/x-www-form-urlencoded
+
+ name=Gisle%20Aas&email=gisle%40aas.no&gender=M&born=1964&perc=3%25
+
+Multivalued form fields can be specified by either repeating the field
+name or by passing the value as an array reference.
+
+The POST method also supports the C<multipart/form-data> content used
+for I<Form-based File Upload> as specified in RFC 1867. You trigger
+this content format by specifying a content type of C<'form-data'> as
+one of the request headers. If one of the values in the $form_ref is
+an array reference, then it is treated as a file part specification
+with the following interpretation:
+
+ [ $file, $filename, Header => Value... ]
+ [ undef, $filename, Header => Value,..., Content => $content ]
+
+The first value in the array ($file) is the name of a file to open.
+This file will be read and its content placed in the request. The
+routine will croak if the file can't be opened. Use an C<undef> as
+$file value if you want to specify the content directly with a
+C<Content> header. The $filename is the filename to report in the
+request. If this value is undefined, then the basename of the $file
+will be used. You can specify an empty string as $filename if you
+want to suppress sending the filename when you provide a $file value.
+
+If a $file is provided by no C<Content-Type> header, then C<Content-Type>
+and C<Content-Encoding> will be filled in automatically with the values
+returned by LWP::MediaTypes::guess_media_type()
+
+Sending my F<~/.profile> to the survey used as example above can be
+achieved by this:
+
+ POST 'http://www.perl.org/survey.cgi',
+ Content_Type => 'form-data',
+ Content => [ name => 'Gisle Aas',
+ email => 'gisle@aas.no',
+ gender => 'M',
+ born => '1964',
+ init => ["$ENV{HOME}/.profile"],
+ ]
+
+This will create a HTTP::Request object that almost looks this (the
+boundary and the content of your F<~/.profile> is likely to be
+different):
+
+ POST http://www.perl.org/survey.cgi
+ Content-Length: 388
+ Content-Type: multipart/form-data; boundary="6G+f"
+
+ --6G+f
+ Content-Disposition: form-data; name="name"
+
+ Gisle Aas
+ --6G+f
+ Content-Disposition: form-data; name="email"
+
+ gisle@aas.no
+ --6G+f
+ Content-Disposition: form-data; name="gender"
+
+ M
+ --6G+f
+ Content-Disposition: form-data; name="born"
+
+ 1964
+ --6G+f
+ Content-Disposition: form-data; name="init"; filename=".profile"
+ Content-Type: text/plain
+
+ PATH=/local/perl/bin:$PATH
+ export PATH
+
+ --6G+f--
+
+If you set the $DYNAMIC_FILE_UPLOAD variable (exportable) to some TRUE
+value, then you get back a request object with a subroutine closure as
+the content attribute. This subroutine will read the content of any
+files on demand and return it in suitable chunks. This allow you to
+upload arbitrary big files without using lots of memory. You can even
+upload infinite files like F</dev/audio> if you wish; however, if
+the file is not a plain file, there will be no Content-Length header
+defined for the request. Not all servers (or server
+applications) like this. Also, if the file(s) change in size between
+the time the Content-Length is calculated and the time that the last
+chunk is delivered, the subroutine will C<Croak>.
+
+The post(...) method of "LWP::UserAgent" exists as a shortcut for
+$ua->request(POST ...).
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Request>, L<LWP::UserAgent>
+
+
+=head1 COPYRIGHT
+
+Copyright 1997-2004, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
--- /dev/null
+package HTTP::Response;
+
+require HTTP::Message;
+@ISA = qw(HTTP::Message);
+$VERSION = "5.836";
+
+use strict;
+use HTTP::Status ();
+
+
+
+sub new
+{
+ my($class, $rc, $msg, $header, $content) = @_;
+ my $self = $class->SUPER::new($header, $content);
+ $self->code($rc);
+ $self->message($msg);
+ $self;
+}
+
+
+sub parse
+{
+ my($class, $str) = @_;
+ my $status_line;
+ if ($str =~ s/^(.*)\n//) {
+ $status_line = $1;
+ }
+ else {
+ $status_line = $str;
+ $str = "";
+ }
+
+ my $self = $class->SUPER::parse($str);
+ my($protocol, $code, $message);
+ if ($status_line =~ /^\d{3} /) {
+ # Looks like a response created by HTTP::Response->new
+ ($code, $message) = split(' ', $status_line, 2);
+ } else {
+ ($protocol, $code, $message) = split(' ', $status_line, 3);
+ }
+ $self->protocol($protocol) if $protocol;
+ $self->code($code) if defined($code);
+ $self->message($message) if defined($message);
+ $self;
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $clone = bless $self->SUPER::clone, ref($self);
+ $clone->code($self->code);
+ $clone->message($self->message);
+ $clone->request($self->request->clone) if $self->request;
+ # we don't clone previous
+ $clone;
+}
+
+
+sub code { shift->_elem('_rc', @_); }
+sub message { shift->_elem('_msg', @_); }
+sub previous { shift->_elem('_previous',@_); }
+sub request { shift->_elem('_request', @_); }
+
+
+sub status_line
+{
+ my $self = shift;
+ my $code = $self->{'_rc'} || "000";
+ my $mess = $self->{'_msg'} || HTTP::Status::status_message($code) || "Unknown code";
+ return "$code $mess";
+}
+
+
+sub base
+{
+ my $self = shift;
+ my $base = (
+ $self->header('Content-Base'), # used to be HTTP/1.1
+ $self->header('Content-Location'), # HTTP/1.1
+ $self->header('Base'), # HTTP/1.0
+ )[0];
+ if ($base && $base =~ /^$URI::scheme_re:/o) {
+ # already absolute
+ return $HTTP::URI_CLASS->new($base);
+ }
+
+ my $req = $self->request;
+ if ($req) {
+ # if $base is undef here, the return value is effectively
+ # just a copy of $self->request->uri.
+ return $HTTP::URI_CLASS->new_abs($base, $req->uri);
+ }
+
+ # can't find an absolute base
+ return undef;
+}
+
+
+sub redirects {
+ my $self = shift;
+ my @r;
+ my $r = $self;
+ while (my $p = $r->previous) {
+ push(@r, $p);
+ $r = $p;
+ }
+ return @r unless wantarray;
+ return reverse @r;
+}
+
+
+sub filename
+{
+ my $self = shift;
+ my $file;
+
+ my $cd = $self->header('Content-Disposition');
+ if ($cd) {
+ require HTTP::Headers::Util;
+ if (my @cd = HTTP::Headers::Util::split_header_words($cd)) {
+ my ($disposition, undef, %cd_param) = @{$cd[-1]};
+ $file = $cd_param{filename};
+
+ # RFC 2047 encoded?
+ if ($file && $file =~ /^=\?(.+?)\?(.+?)\?(.+)\?=$/) {
+ my $charset = $1;
+ my $encoding = uc($2);
+ my $encfile = $3;
+
+ if ($encoding eq 'Q' || $encoding eq 'B') {
+ local($SIG{__DIE__});
+ eval {
+ if ($encoding eq 'Q') {
+ $encfile =~ s/_/ /g;
+ require MIME::QuotedPrint;
+ $encfile = MIME::QuotedPrint::decode($encfile);
+ }
+ else { # $encoding eq 'B'
+ require MIME::Base64;
+ $encfile = MIME::Base64::decode($encfile);
+ }
+
+ require Encode;
+ require encoding;
+ # This is ugly use of non-public API, but is there
+ # a better way to accomplish what we want (locally
+ # as-is usable filename string)?
+ my $locale_charset = encoding::_get_locale_encoding();
+ Encode::from_to($encfile, $charset, $locale_charset);
+ };
+
+ $file = $encfile unless $@;
+ }
+ }
+ }
+ }
+
+ unless (defined($file) && length($file)) {
+ my $uri;
+ if (my $cl = $self->header('Content-Location')) {
+ $uri = URI->new($cl);
+ }
+ elsif (my $request = $self->request) {
+ $uri = $request->uri;
+ }
+
+ if ($uri) {
+ $file = ($uri->path_segments)[-1];
+ }
+ }
+
+ if ($file) {
+ $file =~ s,.*[\\/],,; # basename
+ }
+
+ if ($file && !length($file)) {
+ $file = undef;
+ }
+
+ $file;
+}
+
+
+sub as_string
+{
+ require HTTP::Status;
+ my $self = shift;
+ my($eol) = @_;
+ $eol = "\n" unless defined $eol;
+
+ my $status_line = $self->status_line;
+ my $proto = $self->protocol;
+ $status_line = "$proto $status_line" if $proto;
+
+ return join($eol, $status_line, $self->SUPER::as_string(@_));
+}
+
+
+sub dump
+{
+ my $self = shift;
+
+ my $status_line = $self->status_line;
+ my $proto = $self->protocol;
+ $status_line = "$proto $status_line" if $proto;
+
+ return $self->SUPER::dump(
+ preheader => $status_line,
+ @_,
+ );
+}
+
+
+sub is_info { HTTP::Status::is_info (shift->{'_rc'}); }
+sub is_success { HTTP::Status::is_success (shift->{'_rc'}); }
+sub is_redirect { HTTP::Status::is_redirect (shift->{'_rc'}); }
+sub is_error { HTTP::Status::is_error (shift->{'_rc'}); }
+
+
+sub error_as_HTML
+{
+ require HTML::Entities;
+ my $self = shift;
+ my $title = 'An Error Occurred';
+ my $body = HTML::Entities::encode($self->status_line);
+ return <<EOM;
+<html>
+<head><title>$title</title></head>
+<body>
+<h1>$title</h1>
+<p>$body</p>
+</body>
+</html>
+EOM
+}
+
+
+sub current_age
+{
+ my $self = shift;
+ my $time = shift;
+
+ # Implementation of RFC 2616 section 13.2.3
+ # (age calculations)
+ my $response_time = $self->client_date;
+ my $date = $self->date;
+
+ my $age = 0;
+ if ($response_time && $date) {
+ $age = $response_time - $date; # apparent_age
+ $age = 0 if $age < 0;
+ }
+
+ my $age_v = $self->header('Age');
+ if ($age_v && $age_v > $age) {
+ $age = $age_v; # corrected_received_age
+ }
+
+ if ($response_time) {
+ my $request = $self->request;
+ if ($request) {
+ my $request_time = $request->date;
+ if ($request_time && $request_time < $response_time) {
+ # Add response_delay to age to get 'corrected_initial_age'
+ $age += $response_time - $request_time;
+ }
+ }
+ $age += ($time || time) - $response_time;
+ }
+ return $age;
+}
+
+
+sub freshness_lifetime
+{
+ my($self, %opt) = @_;
+
+ # First look for the Cache-Control: max-age=n header
+ for my $cc ($self->header('Cache-Control')) {
+ for my $cc_dir (split(/\s*,\s*/, $cc)) {
+ return $1 if $cc_dir =~ /^max-age\s*=\s*(\d+)/i;
+ }
+ }
+
+ # Next possibility is to look at the "Expires" header
+ my $date = $self->date || $self->client_date || $opt{time} || time;
+ if (my $expires = $self->expires) {
+ return $expires - $date;
+ }
+
+ # Must apply heuristic expiration
+ return undef if exists $opt{heuristic_expiry} && !$opt{heuristic_expiry};
+
+ # Default heuristic expiration parameters
+ $opt{h_min} ||= 60;
+ $opt{h_max} ||= 24 * 3600;
+ $opt{h_lastmod_fraction} ||= 0.10; # 10% since last-mod suggested by RFC2616
+ $opt{h_default} ||= 3600;
+
+ # Should give a warning if more than 24 hours according to
+ # RFC 2616 section 13.2.4. Here we just make this the default
+ # maximum value.
+
+ if (my $last_modified = $self->last_modified) {
+ my $h_exp = ($date - $last_modified) * $opt{h_lastmod_fraction};
+ return $opt{h_min} if $h_exp < $opt{h_min};
+ return $opt{h_max} if $h_exp > $opt{h_max};
+ return $h_exp;
+ }
+
+ # default when all else fails
+ return $opt{h_min} if $opt{h_min} > $opt{h_default};
+ return $opt{h_default};
+}
+
+
+sub is_fresh
+{
+ my($self, %opt) = @_;
+ $opt{time} ||= time;
+ my $f = $self->freshness_lifetime(%opt);
+ return undef unless defined($f);
+ return $f > $self->current_age($opt{time});
+}
+
+
+sub fresh_until
+{
+ my($self, %opt) = @_;
+ $opt{time} ||= time;
+ my $f = $self->freshness_lifetime(%opt);
+ return undef unless defined($f);
+ return $f - $self->current_age($opt{time}) + $opt{time};
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Response - HTTP style response message
+
+=head1 SYNOPSIS
+
+Response objects are returned by the request() method of the C<LWP::UserAgent>:
+
+ # ...
+ $response = $ua->request($request)
+ if ($response->is_success) {
+ print $response->decoded_content;
+ }
+ else {
+ print STDERR $response->status_line, "\n";
+ }
+
+=head1 DESCRIPTION
+
+The C<HTTP::Response> class encapsulates HTTP style responses. A
+response consists of a response line, some headers, and a content
+body. Note that the LWP library uses HTTP style responses even for
+non-HTTP protocol schemes. Instances of this class are usually
+created and returned by the request() method of an C<LWP::UserAgent>
+object.
+
+C<HTTP::Response> is a subclass of C<HTTP::Message> and therefore
+inherits its methods. The following additional methods are available:
+
+=over 4
+
+=item $r = HTTP::Response->new( $code )
+
+=item $r = HTTP::Response->new( $code, $msg )
+
+=item $r = HTTP::Response->new( $code, $msg, $header )
+
+=item $r = HTTP::Response->new( $code, $msg, $header, $content )
+
+Constructs a new C<HTTP::Response> object describing a response with
+response code $code and optional message $msg. The optional $header
+argument should be a reference to an C<HTTP::Headers> object or a
+plain array reference of key/value pairs. The optional $content
+argument should be a string of bytes. The meaning these arguments are
+described below.
+
+=item $r = HTTP::Response->parse( $str )
+
+This constructs a new response object by parsing the given string.
+
+=item $r->code
+
+=item $r->code( $code )
+
+This is used to get/set the code attribute. The code is a 3 digit
+number that encode the overall outcome of a HTTP response. The
+C<HTTP::Status> module provide constants that provide mnemonic names
+for the code attribute.
+
+=item $r->message
+
+=item $r->message( $message )
+
+This is used to get/set the message attribute. The message is a short
+human readable single line string that explains the response code.
+
+=item $r->header( $field )
+
+=item $r->header( $field => $value )
+
+This is used to get/set header values and it is inherited from
+C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for
+details and other similar methods that can be used to access the
+headers.
+
+=item $r->content
+
+=item $r->content( $bytes )
+
+This is used to get/set the raw content and it is inherited from the
+C<HTTP::Message> base class. See L<HTTP::Message> for details and
+other methods that can be used to access the content.
+
+=item $r->decoded_content( %options )
+
+This will return the content after any C<Content-Encoding> and
+charsets have been decoded. See L<HTTP::Message> for details.
+
+=item $r->request
+
+=item $r->request( $request )
+
+This is used to get/set the request attribute. The request attribute
+is a reference to the the request that caused this response. It does
+not have to be the same request passed to the $ua->request() method,
+because there might have been redirects and authorization retries in
+between.
+
+=item $r->previous
+
+=item $r->previous( $response )
+
+This is used to get/set the previous attribute. The previous
+attribute is used to link together chains of responses. You get
+chains of responses if the first response is redirect or unauthorized.
+The value is C<undef> if this is the first response in a chain.
+
+Note that the method $r->redirects is provided as a more convenient
+way to access the response chain.
+
+=item $r->status_line
+
+Returns the string "E<lt>code> E<lt>message>". If the message attribute
+is not set then the official name of E<lt>code> (see L<HTTP::Status>)
+is substituted.
+
+=item $r->base
+
+Returns the base URI for this response. The return value will be a
+reference to a URI object.
+
+The base URI is obtained from one the following sources (in priority
+order):
+
+=over 4
+
+=item 1.
+
+Embedded in the document content, for instance <BASE HREF="...">
+in HTML documents.
+
+=item 2.
+
+A "Content-Base:" or a "Content-Location:" header in the response.
+
+For backwards compatibility with older HTTP implementations we will
+also look for the "Base:" header.
+
+=item 3.
+
+The URI used to request this response. This might not be the original
+URI that was passed to $ua->request() method, because we might have
+received some redirect responses first.
+
+=back
+
+If none of these sources provide an absolute URI, undef is returned.
+
+When the LWP protocol modules produce the HTTP::Response object, then
+any base URI embedded in the document (step 1) will already have
+initialized the "Content-Base:" header. This means that this method
+only performs the last 2 steps (the content is not always available
+either).
+
+=item $r->filename
+
+Returns a filename for this response. Note that doing sanity checks
+on the returned filename (eg. removing characters that cannot be used
+on the target filesystem where the filename would be used, and
+laundering it for security purposes) are the caller's responsibility;
+the only related thing done by this method is that it makes a simple
+attempt to return a plain filename with no preceding path segments.
+
+The filename is obtained from one the following sources (in priority
+order):
+
+=over 4
+
+=item 1.
+
+A "Content-Disposition:" header in the response. Proper decoding of
+RFC 2047 encoded filenames requires the C<MIME::QuotedPrint> (for "Q"
+encoding), C<MIME::Base64> (for "B" encoding), and C<Encode> modules.
+
+=item 2.
+
+A "Content-Location:" header in the response.
+
+=item 3.
+
+The URI used to request this response. This might not be the original
+URI that was passed to $ua->request() method, because we might have
+received some redirect responses first.
+
+=back
+
+If a filename cannot be derived from any of these sources, undef is
+returned.
+
+=item $r->as_string
+
+=item $r->as_string( $eol )
+
+Returns a textual representation of the response.
+
+=item $r->is_info
+
+=item $r->is_success
+
+=item $r->is_redirect
+
+=item $r->is_error
+
+These methods indicate if the response was informational, successful, a
+redirection, or an error. See L<HTTP::Status> for the meaning of these.
+
+=item $r->error_as_HTML
+
+Returns a string containing a complete HTML document indicating what
+error occurred. This method should only be called when $r->is_error
+is TRUE.
+
+=item $r->redirects
+
+Returns the list of redirect responses that lead up to this response
+by following the $r->previous chain. The list order is oldest first.
+
+In scalar context return the number of redirect responses leading up
+to this one.
+
+=item $r->current_age
+
+Calculates the "current age" of the response as specified by RFC 2616
+section 13.2.3. The age of a response is the time since it was sent
+by the origin server. The returned value is a number representing the
+age in seconds.
+
+=item $r->freshness_lifetime( %opt )
+
+Calculates the "freshness lifetime" of the response as specified by
+RFC 2616 section 13.2.4. The "freshness lifetime" is the length of
+time between the generation of a response and its expiration time.
+The returned value is the number of seconds until expiry.
+
+If the response does not contain an "Expires" or a "Cache-Control"
+header, then this function will apply some simple heuristic based on
+the "Last-Modified" header to determine a suitable lifetime. The
+following options might be passed to control the heuristics:
+
+=over
+
+=item heuristic_expiry => $bool
+
+If passed as a FALSE value, don't apply heuristics and just return
+C<undef> when "Expires" or "Cache-Control" is lacking.
+
+=item h_lastmod_fraction => $num
+
+This number represent the fraction of the difference since the
+"Last-Modified" timestamp to make the expiry time. The default is
+C<0.10>, the suggested typical setting of 10% in RFC 2616.
+
+=item h_min => $sec
+
+This is the lower limit of the heuristic expiry age to use. The
+default is C<60> (1 minute).
+
+=item h_max => $sec
+
+This is the upper limit of the heuristic expiry age to use. The
+default is C<86400> (24 hours).
+
+=item h_default => $sec
+
+This is the expiry age to use when nothing else applies. The default
+is C<3600> (1 hour) or "h_min" if greater.
+
+=back
+
+=item $r->is_fresh( %opt )
+
+Returns TRUE if the response is fresh, based on the values of
+freshness_lifetime() and current_age(). If the response is no longer
+fresh, then it has to be re-fetched or re-validated by the origin
+server.
+
+Options might be passed to control expiry heuristics, see the
+description of freshness_lifetime().
+
+=item $r->fresh_until( %opt )
+
+Returns the time (seconds since epoch) when this entity is no longer fresh.
+
+Options might be passed to control expiry heuristics, see the
+description of freshness_lifetime().
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Status>, L<HTTP::Request>
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Status;
+
+use strict;
+require 5.002; # because we use prototypes
+
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(is_info is_success is_redirect is_error status_message);
+@EXPORT_OK = qw(is_client_error is_server_error);
+$VERSION = "5.817";
+
+# Note also addition of mnemonics to @EXPORT below
+
+# Unmarked codes are from RFC 2616
+# See also: http://en.wikipedia.org/wiki/List_of_HTTP_status_codes
+
+my %StatusCode = (
+ 100 => 'Continue',
+ 101 => 'Switching Protocols',
+ 102 => 'Processing', # RFC 2518 (WebDAV)
+ 200 => 'OK',
+ 201 => 'Created',
+ 202 => 'Accepted',
+ 203 => 'Non-Authoritative Information',
+ 204 => 'No Content',
+ 205 => 'Reset Content',
+ 206 => 'Partial Content',
+ 207 => 'Multi-Status', # RFC 2518 (WebDAV)
+ 300 => 'Multiple Choices',
+ 301 => 'Moved Permanently',
+ 302 => 'Found',
+ 303 => 'See Other',
+ 304 => 'Not Modified',
+ 305 => 'Use Proxy',
+ 307 => 'Temporary Redirect',
+ 400 => 'Bad Request',
+ 401 => 'Unauthorized',
+ 402 => 'Payment Required',
+ 403 => 'Forbidden',
+ 404 => 'Not Found',
+ 405 => 'Method Not Allowed',
+ 406 => 'Not Acceptable',
+ 407 => 'Proxy Authentication Required',
+ 408 => 'Request Timeout',
+ 409 => 'Conflict',
+ 410 => 'Gone',
+ 411 => 'Length Required',
+ 412 => 'Precondition Failed',
+ 413 => 'Request Entity Too Large',
+ 414 => 'Request-URI Too Large',
+ 415 => 'Unsupported Media Type',
+ 416 => 'Request Range Not Satisfiable',
+ 417 => 'Expectation Failed',
+ 422 => 'Unprocessable Entity', # RFC 2518 (WebDAV)
+ 423 => 'Locked', # RFC 2518 (WebDAV)
+ 424 => 'Failed Dependency', # RFC 2518 (WebDAV)
+ 425 => 'No code', # WebDAV Advanced Collections
+ 426 => 'Upgrade Required', # RFC 2817
+ 449 => 'Retry with', # unofficial Microsoft
+ 500 => 'Internal Server Error',
+ 501 => 'Not Implemented',
+ 502 => 'Bad Gateway',
+ 503 => 'Service Unavailable',
+ 504 => 'Gateway Timeout',
+ 505 => 'HTTP Version Not Supported',
+ 506 => 'Variant Also Negotiates', # RFC 2295
+ 507 => 'Insufficient Storage', # RFC 2518 (WebDAV)
+ 509 => 'Bandwidth Limit Exceeded', # unofficial
+ 510 => 'Not Extended', # RFC 2774
+);
+
+my $mnemonicCode = '';
+my ($code, $message);
+while (($code, $message) = each %StatusCode) {
+ # create mnemonic subroutines
+ $message =~ tr/a-z \-/A-Z__/;
+ $mnemonicCode .= "sub HTTP_$message () { $code }\n";
+ $mnemonicCode .= "*RC_$message = \\&HTTP_$message;\n"; # legacy
+ $mnemonicCode .= "push(\@EXPORT_OK, 'HTTP_$message');\n";
+ $mnemonicCode .= "push(\@EXPORT, 'RC_$message');\n";
+}
+eval $mnemonicCode; # only one eval for speed
+die if $@;
+
+# backwards compatibility
+*RC_MOVED_TEMPORARILY = \&RC_FOUND; # 302 was renamed in the standard
+push(@EXPORT, "RC_MOVED_TEMPORARILY");
+
+%EXPORT_TAGS = (
+ constants => [grep /^HTTP_/, @EXPORT_OK],
+ is => [grep /^is_/, @EXPORT, @EXPORT_OK],
+);
+
+
+sub status_message ($) { $StatusCode{$_[0]}; }
+
+sub is_info ($) { $_[0] >= 100 && $_[0] < 200; }
+sub is_success ($) { $_[0] >= 200 && $_[0] < 300; }
+sub is_redirect ($) { $_[0] >= 300 && $_[0] < 400; }
+sub is_error ($) { $_[0] >= 400 && $_[0] < 600; }
+sub is_client_error ($) { $_[0] >= 400 && $_[0] < 500; }
+sub is_server_error ($) { $_[0] >= 500 && $_[0] < 600; }
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Status - HTTP Status code processing
+
+=head1 SYNOPSIS
+
+ use HTTP::Status qw(:constants :is status_message);
+
+ if ($rc != HTTP_OK) {
+ print status_message($rc), "\n";
+ }
+
+ if (is_success($rc)) { ... }
+ if (is_error($rc)) { ... }
+ if (is_redirect($rc)) { ... }
+
+=head1 DESCRIPTION
+
+I<HTTP::Status> is a library of routines for defining and
+classifying HTTP status codes for libwww-perl. Status codes are
+used to encode the overall outcome of a HTTP response message. Codes
+correspond to those defined in RFC 2616 and RFC 2518.
+
+=head1 CONSTANTS
+
+The following constant functions can be used as mnemonic status code
+names. None of these are exported by default. Use the C<:constants>
+tag to import them all.
+
+ HTTP_CONTINUE (100)
+ HTTP_SWITCHING_PROTOCOLS (101)
+ HTTP_PROCESSING (102)
+
+ HTTP_OK (200)
+ HTTP_CREATED (201)
+ HTTP_ACCEPTED (202)
+ HTTP_NON_AUTHORITATIVE_INFORMATION (203)
+ HTTP_NO_CONTENT (204)
+ HTTP_RESET_CONTENT (205)
+ HTTP_PARTIAL_CONTENT (206)
+ HTTP_MULTI_STATUS (207)
+
+ HTTP_MULTIPLE_CHOICES (300)
+ HTTP_MOVED_PERMANENTLY (301)
+ HTTP_FOUND (302)
+ HTTP_SEE_OTHER (303)
+ HTTP_NOT_MODIFIED (304)
+ HTTP_USE_PROXY (305)
+ HTTP_TEMPORARY_REDIRECT (307)
+
+ HTTP_BAD_REQUEST (400)
+ HTTP_UNAUTHORIZED (401)
+ HTTP_PAYMENT_REQUIRED (402)
+ HTTP_FORBIDDEN (403)
+ HTTP_NOT_FOUND (404)
+ HTTP_METHOD_NOT_ALLOWED (405)
+ HTTP_NOT_ACCEPTABLE (406)
+ HTTP_PROXY_AUTHENTICATION_REQUIRED (407)
+ HTTP_REQUEST_TIMEOUT (408)
+ HTTP_CONFLICT (409)
+ HTTP_GONE (410)
+ HTTP_LENGTH_REQUIRED (411)
+ HTTP_PRECONDITION_FAILED (412)
+ HTTP_REQUEST_ENTITY_TOO_LARGE (413)
+ HTTP_REQUEST_URI_TOO_LARGE (414)
+ HTTP_UNSUPPORTED_MEDIA_TYPE (415)
+ HTTP_REQUEST_RANGE_NOT_SATISFIABLE (416)
+ HTTP_EXPECTATION_FAILED (417)
+ HTTP_UNPROCESSABLE_ENTITY (422)
+ HTTP_LOCKED (423)
+ HTTP_FAILED_DEPENDENCY (424)
+ HTTP_NO_CODE (425)
+ HTTP_UPGRADE_REQUIRED (426)
+ HTTP_RETRY_WITH (449)
+
+ HTTP_INTERNAL_SERVER_ERROR (500)
+ HTTP_NOT_IMPLEMENTED (501)
+ HTTP_BAD_GATEWAY (502)
+ HTTP_SERVICE_UNAVAILABLE (503)
+ HTTP_GATEWAY_TIMEOUT (504)
+ HTTP_HTTP_VERSION_NOT_SUPPORTED (505)
+ HTTP_VARIANT_ALSO_NEGOTIATES (506)
+ HTTP_INSUFFICIENT_STORAGE (507)
+ HTTP_BANDWIDTH_LIMIT_EXCEEDED (509)
+ HTTP_NOT_EXTENDED (510)
+
+=head1 FUNCTIONS
+
+The following additional functions are provided. Most of them are
+exported by default. The C<:is> import tag can be used to import all
+the classification functions.
+
+=over 4
+
+=item status_message( $code )
+
+The status_message() function will translate status codes to human
+readable strings. The string is the same as found in the constant
+names above. If the $code is unknown, then C<undef> is returned.
+
+=item is_info( $code )
+
+Return TRUE if C<$code> is an I<Informational> status code (1xx). This
+class of status code indicates a provisional response which can't have
+any content.
+
+=item is_success( $code )
+
+Return TRUE if C<$code> is a I<Successful> status code (2xx).
+
+=item is_redirect( $code )
+
+Return TRUE if C<$code> is a I<Redirection> status code (3xx). This class of
+status code indicates that further action needs to be taken by the
+user agent in order to fulfill the request.
+
+=item is_error( $code )
+
+Return TRUE if C<$code> is an I<Error> status code (4xx or 5xx). The function
+return TRUE for both client error or a server error status codes.
+
+=item is_client_error( $code )
+
+Return TRUE if C<$code> is an I<Client Error> status code (4xx). This class
+of status code is intended for cases in which the client seems to have
+erred.
+
+This function is B<not> exported by default.
+
+=item is_server_error( $code )
+
+Return TRUE if C<$code> is an I<Server Error> status code (5xx). This class
+of status codes is intended for cases in which the server is aware
+that it has erred or is incapable of performing the request.
+
+This function is B<not> exported by default.
+
+=back
+
+=head1 BUGS
+
+For legacy reasons all the C<HTTP_> constants are exported by default
+with the prefix C<RC_>. It's recommended to use explict imports and
+the C<:constants> tag instead of relying on this.
--- /dev/null
+package LWP;
+
+$VERSION = "5.836";
+sub Version { $VERSION; }
+
+require 5.005;
+require LWP::UserAgent; # this should load everything you need
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP - The World-Wide Web library for Perl
+
+=head1 SYNOPSIS
+
+ use LWP;
+ print "This is libwww-perl-$LWP::VERSION\n";
+
+
+=head1 DESCRIPTION
+
+The libwww-perl collection is a set of Perl modules which provides a
+simple and consistent application programming interface (API) to the
+World-Wide Web. The main focus of the library is to provide classes
+and functions that allow you to write WWW clients. The library also
+contain modules that are of more general use and even classes that
+help you implement simple HTTP servers.
+
+Most modules in this library provide an object oriented API. The user
+agent, requests sent and responses received from the WWW server are
+all represented by objects. This makes a simple and powerful
+interface to these services. The interface is easy to extend
+and customize for your own needs.
+
+The main features of the library are:
+
+=over 3
+
+=item *
+
+Contains various reusable components (modules) that can be
+used separately or together.
+
+=item *
+
+Provides an object oriented model of HTTP-style communication. Within
+this framework we currently support access to http, https, gopher, ftp, news,
+file, and mailto resources.
+
+=item *
+
+Provides a full object oriented interface or
+a very simple procedural interface.
+
+=item *
+
+Supports the basic and digest authorization schemes.
+
+=item *
+
+Supports transparent redirect handling.
+
+=item *
+
+Supports access through proxy servers.
+
+=item *
+
+Provides parser for F<robots.txt> files and a framework for constructing robots.
+
+=item *
+
+Supports parsing of HTML forms.
+
+=item *
+
+Implements HTTP content negotiation algorithm that can
+be used both in protocol modules and in server scripts (like CGI
+scripts).
+
+=item *
+
+Supports HTTP cookies.
+
+=item *
+
+Some simple command line clients, for instance C<lwp-request> and C<lwp-download>.
+
+=back
+
+
+=head1 HTTP STYLE COMMUNICATION
+
+
+The libwww-perl library is based on HTTP style communication. This
+section tries to describe what that means.
+
+Let us start with this quote from the HTTP specification document
+<URL:http://www.w3.org/pub/WWW/Protocols/>:
+
+=over 3
+
+=item
+
+The HTTP protocol is based on a request/response paradigm. A client
+establishes a connection with a server and sends a request to the
+server in the form of a request method, URI, and protocol version,
+followed by a MIME-like message containing request modifiers, client
+information, and possible body content. The server responds with a
+status line, including the message's protocol version and a success or
+error code, followed by a MIME-like message containing server
+information, entity meta-information, and possible body content.
+
+=back
+
+What this means to libwww-perl is that communication always take place
+through these steps: First a I<request> object is created and
+configured. This object is then passed to a server and we get a
+I<response> object in return that we can examine. A request is always
+independent of any previous requests, i.e. the service is stateless.
+The same simple model is used for any kind of service we want to
+access.
+
+For example, if we want to fetch a document from a remote file server,
+then we send it a request that contains a name for that document and
+the response will contain the document itself. If we access a search
+engine, then the content of the request will contain the query
+parameters and the response will contain the query result. If we want
+to send a mail message to somebody then we send a request object which
+contains our message to the mail server and the response object will
+contain an acknowledgment that tells us that the message has been
+accepted and will be forwarded to the recipient(s).
+
+It is as simple as that!
+
+
+=head2 The Request Object
+
+The libwww-perl request object has the class name C<HTTP::Request>.
+The fact that the class name uses C<HTTP::> as a
+prefix only implies that we use the HTTP model of communication. It
+does not limit the kind of services we can try to pass this I<request>
+to. For instance, we will send C<HTTP::Request>s both to ftp and
+gopher servers, as well as to the local file system.
+
+The main attributes of the request objects are:
+
+=over 3
+
+=item *
+
+The B<method> is a short string that tells what kind of
+request this is. The most common methods are B<GET>, B<PUT>,
+B<POST> and B<HEAD>.
+
+=item *
+
+The B<uri> is a string denoting the protocol, server and
+the name of the "document" we want to access. The B<uri> might
+also encode various other parameters.
+
+=item *
+
+The B<headers> contain additional information about the
+request and can also used to describe the content. The headers
+are a set of keyword/value pairs.
+
+=item *
+
+The B<content> is an arbitrary amount of data.
+
+=back
+
+=head2 The Response Object
+
+The libwww-perl response object has the class name C<HTTP::Response>.
+The main attributes of objects of this class are:
+
+=over 3
+
+=item *
+
+The B<code> is a numerical value that indicates the overall
+outcome of the request.
+
+=item *
+
+The B<message> is a short, human readable string that
+corresponds to the I<code>.
+
+=item *
+
+The B<headers> contain additional information about the
+response and describe the content.
+
+=item *
+
+The B<content> is an arbitrary amount of data.
+
+=back
+
+Since we don't want to handle all possible I<code> values directly in
+our programs, a libwww-perl response object has methods that can be
+used to query what kind of response this is. The most commonly used
+response classification methods are:
+
+=over 3
+
+=item is_success()
+
+The request was was successfully received, understood or accepted.
+
+=item is_error()
+
+The request failed. The server or the resource might not be
+available, access to the resource might be denied or other things might
+have failed for some reason.
+
+=back
+
+=head2 The User Agent
+
+Let us assume that we have created a I<request> object. What do we
+actually do with it in order to receive a I<response>?
+
+The answer is that you pass it to a I<user agent> object and this
+object takes care of all the things that need to be done
+(like low-level communication and error handling) and returns
+a I<response> object. The user agent represents your
+application on the network and provides you with an interface that
+can accept I<requests> and return I<responses>.
+
+The user agent is an interface layer between
+your application code and the network. Through this interface you are
+able to access the various servers on the network.
+
+The class name for the user agent is C<LWP::UserAgent>. Every
+libwww-perl application that wants to communicate should create at
+least one object of this class. The main method provided by this
+object is request(). This method takes an C<HTTP::Request> object as
+argument and (eventually) returns a C<HTTP::Response> object.
+
+The user agent has many other attributes that let you
+configure how it will interact with the network and with your
+application.
+
+=over 3
+
+=item *
+
+The B<timeout> specifies how much time we give remote servers to
+respond before the library disconnects and creates an
+internal I<timeout> response.
+
+=item *
+
+The B<agent> specifies the name that your application should use when it
+presents itself on the network.
+
+=item *
+
+The B<from> attribute can be set to the e-mail address of the person
+responsible for running the application. If this is set, then the
+address will be sent to the servers with every request.
+
+=item *
+
+The B<parse_head> specifies whether we should initialize response
+headers from the E<lt>head> section of HTML documents.
+
+=item *
+
+The B<proxy> and B<no_proxy> attributes specify if and when to go through
+a proxy server. <URL:http://www.w3.org/pub/WWW/Proxies/>
+
+=item *
+
+The B<credentials> provide a way to set up user names and
+passwords needed to access certain services.
+
+=back
+
+Many applications want even more control over how they interact
+with the network and they get this by sub-classing
+C<LWP::UserAgent>. The library includes a
+sub-class, C<LWP::RobotUA>, for robot applications.
+
+=head2 An Example
+
+This example shows how the user agent, a request and a response are
+represented in actual perl code:
+
+ # Create a user agent object
+ use LWP::UserAgent;
+ my $ua = LWP::UserAgent->new;
+ $ua->agent("MyApp/0.1 ");
+
+ # Create a request
+ my $req = HTTP::Request->new(POST => 'http://search.cpan.org/search');
+ $req->content_type('application/x-www-form-urlencoded');
+ $req->content('query=libwww-perl&mode=dist');
+
+ # Pass request to the user agent and get a response back
+ my $res = $ua->request($req);
+
+ # Check the outcome of the response
+ if ($res->is_success) {
+ print $res->content;
+ }
+ else {
+ print $res->status_line, "\n";
+ }
+
+The $ua is created once when the application starts up. New request
+objects should normally created for each request sent.
+
+
+=head1 NETWORK SUPPORT
+
+This section discusses the various protocol schemes and
+the HTTP style methods that headers may be used for each.
+
+For all requests, a "User-Agent" header is added and initialized from
+the $ua->agent attribute before the request is handed to the network
+layer. In the same way, a "From" header is initialized from the
+$ua->from attribute.
+
+For all responses, the library adds a header called "Client-Date".
+This header holds the time when the response was received by
+your application. The format and semantics of the header are the
+same as the server created "Date" header. You may also encounter other
+"Client-XXX" headers. They are all generated by the library
+internally and are not received from the servers.
+
+=head2 HTTP Requests
+
+HTTP requests are just handed off to an HTTP server and it
+decides what happens. Few servers implement methods beside the usual
+"GET", "HEAD", "POST" and "PUT", but CGI-scripts may implement
+any method they like.
+
+If the server is not available then the library will generate an
+internal error response.
+
+The library automatically adds a "Host" and a "Content-Length" header
+to the HTTP request before it is sent over the network.
+
+For a GET request you might want to add a "If-Modified-Since" or
+"If-None-Match" header to make the request conditional.
+
+For a POST request you should add the "Content-Type" header. When you
+try to emulate HTML E<lt>FORM> handling you should usually let the value
+of the "Content-Type" header be "application/x-www-form-urlencoded".
+See L<lwpcook> for examples of this.
+
+The libwww-perl HTTP implementation currently support the HTTP/1.1
+and HTTP/1.0 protocol.
+
+The library allows you to access proxy server through HTTP. This
+means that you can set up the library to forward all types of request
+through the HTTP protocol module. See L<LWP::UserAgent> for
+documentation of this.
+
+
+=head2 HTTPS Requests
+
+HTTPS requests are HTTP requests over an encrypted network connection
+using the SSL protocol developed by Netscape. Everything about HTTP
+requests above also apply to HTTPS requests. In addition the library
+will add the headers "Client-SSL-Cipher", "Client-SSL-Cert-Subject" and
+"Client-SSL-Cert-Issuer" to the response. These headers denote the
+encryption method used and the name of the server owner.
+
+The request can contain the header "If-SSL-Cert-Subject" in order to
+make the request conditional on the content of the server certificate.
+If the certificate subject does not match, no request is sent to the
+server and an internally generated error response is returned. The
+value of the "If-SSL-Cert-Subject" header is interpreted as a Perl
+regular expression.
+
+
+=head2 FTP Requests
+
+The library currently supports GET, HEAD and PUT requests. GET
+retrieves a file or a directory listing from an FTP server. PUT
+stores a file on a ftp server.
+
+You can specify a ftp account for servers that want this in addition
+to user name and password. This is specified by including an "Account"
+header in the request.
+
+User name/password can be specified using basic authorization or be
+encoded in the URL. Failed logins return an UNAUTHORIZED response with
+"WWW-Authenticate: Basic" and can be treated like basic authorization
+for HTTP.
+
+The library supports ftp ASCII transfer mode by specifying the "type=a"
+parameter in the URL. It also supports transfer of ranges for FTP transfers
+using the "Range" header.
+
+Directory listings are by default returned unprocessed (as returned
+from the ftp server) with the content media type reported to be
+"text/ftp-dir-listing". The C<File::Listing> module provides methods
+for parsing of these directory listing.
+
+The ftp module is also able to convert directory listings to HTML and
+this can be requested via the standard HTTP content negotiation
+mechanisms (add an "Accept: text/html" header in the request if you
+want this).
+
+For normal file retrievals, the "Content-Type" is guessed based on the
+file name suffix. See L<LWP::MediaTypes>.
+
+The "If-Modified-Since" request header works for servers that implement
+the MDTM command. It will probably not work for directory listings though.
+
+Example:
+
+ $req = HTTP::Request->new(GET => 'ftp://me:passwd@ftp.some.where.com/');
+ $req->header(Accept => "text/html, */*;q=0.1");
+
+=head2 News Requests
+
+Access to the USENET News system is implemented through the NNTP
+protocol. The name of the news server is obtained from the
+NNTP_SERVER environment variable and defaults to "news". It is not
+possible to specify the hostname of the NNTP server in news: URLs.
+
+The library supports GET and HEAD to retrieve news articles through the
+NNTP protocol. You can also post articles to newsgroups by using
+(surprise!) the POST method.
+
+GET on newsgroups is not implemented yet.
+
+Examples:
+
+ $req = HTTP::Request->new(GET => 'news:abc1234@a.sn.no');
+
+ $req = HTTP::Request->new(POST => 'news:comp.lang.perl.test');
+ $req->header(Subject => 'This is a test',
+ From => 'me@some.where.org');
+ $req->content(<<EOT);
+ This is the content of the message that we are sending to
+ the world.
+ EOT
+
+
+=head2 Gopher Request
+
+The library supports the GET and HEAD methods for gopher requests. All
+request header values are ignored. HEAD cheats and returns a
+response without even talking to server.
+
+Gopher menus are always converted to HTML.
+
+The response "Content-Type" is generated from the document type
+encoded (as the first letter) in the request URL path itself.
+
+Example:
+
+ $req = HTTP::Request->new(GET => 'gopher://gopher.sn.no/');
+
+
+
+=head2 File Request
+
+The library supports GET and HEAD methods for file requests. The
+"If-Modified-Since" header is supported. All other headers are
+ignored. The I<host> component of the file URL must be empty or set
+to "localhost". Any other I<host> value will be treated as an error.
+
+Directories are always converted to an HTML document. For normal
+files, the "Content-Type" and "Content-Encoding" in the response are
+guessed based on the file suffix.
+
+Example:
+
+ $req = HTTP::Request->new(GET => 'file:/etc/passwd');
+
+
+=head2 Mailto Request
+
+You can send (aka "POST") mail messages using the library. All
+headers specified for the request are passed on to the mail system.
+The "To" header is initialized from the mail address in the URL.
+
+Example:
+
+ $req = HTTP::Request->new(POST => 'mailto:libwww@perl.org');
+ $req->header(Subject => "subscribe");
+ $req->content("Please subscribe me to the libwww-perl mailing list!\n");
+
+=head2 CPAN Requests
+
+URLs with scheme C<cpan:> are redirected to the a suitable CPAN
+mirror. If you have your own local mirror of CPAN you might tell LWP
+to use it for C<cpan:> URLs by an assignment like this:
+
+ $LWP::Protocol::cpan::CPAN = "file:/local/CPAN/";
+
+Suitable CPAN mirrors are also picked up from the configuration for
+the CPAN.pm, so if you have used that module a suitable mirror should
+be picked automatically. If neither of these apply, then a redirect
+to the generic CPAN http location is issued.
+
+Example request to download the newest perl:
+
+ $req = HTTP::Request->new(GET => "cpan:src/latest.tar.gz");
+
+
+=head1 OVERVIEW OF CLASSES AND PACKAGES
+
+This table should give you a quick overview of the classes provided by the
+library. Indentation shows class inheritance.
+
+ LWP::MemberMixin -- Access to member variables of Perl5 classes
+ LWP::UserAgent -- WWW user agent class
+ LWP::RobotUA -- When developing a robot applications
+ LWP::Protocol -- Interface to various protocol schemes
+ LWP::Protocol::http -- http:// access
+ LWP::Protocol::file -- file:// access
+ LWP::Protocol::ftp -- ftp:// access
+ ...
+
+ LWP::Authen::Basic -- Handle 401 and 407 responses
+ LWP::Authen::Digest
+
+ HTTP::Headers -- MIME/RFC822 style header (used by HTTP::Message)
+ HTTP::Message -- HTTP style message
+ HTTP::Request -- HTTP request
+ HTTP::Response -- HTTP response
+ HTTP::Daemon -- A HTTP server class
+
+ WWW::RobotRules -- Parse robots.txt files
+ WWW::RobotRules::AnyDBM_File -- Persistent RobotRules
+
+ Net::HTTP -- Low level HTTP client
+
+The following modules provide various functions and definitions.
+
+ LWP -- This file. Library version number and documentation.
+ LWP::MediaTypes -- MIME types configuration (text/html etc.)
+ LWP::Simple -- Simplified procedural interface for common functions
+ HTTP::Status -- HTTP status code (200 OK etc)
+ HTTP::Date -- Date parsing module for HTTP date formats
+ HTTP::Negotiate -- HTTP content negotiation calculation
+ File::Listing -- Parse directory listings
+ HTML::Form -- Processing for <form>s in HTML documents
+
+
+=head1 MORE DOCUMENTATION
+
+All modules contain detailed information on the interfaces they
+provide. The L<lwpcook> manpage is the libwww-perl cookbook that contain
+examples of typical usage of the library. You might want to take a
+look at how the scripts L<lwp-request>, L<lwp-rget> and L<lwp-mirror>
+are implemented.
+
+=head1 ENVIRONMENT
+
+The following environment variables are used by LWP:
+
+=over
+
+=item HOME
+
+The C<LWP::MediaTypes> functions will look for the F<.media.types> and
+F<.mime.types> files relative to you home directory.
+
+=item http_proxy
+
+=item ftp_proxy
+
+=item xxx_proxy
+
+=item no_proxy
+
+These environment variables can be set to enable communication through
+a proxy server. See the description of the C<env_proxy> method in
+L<LWP::UserAgent>.
+
+=item PERL_LWP_USE_HTTP_10
+
+Enable the old HTTP/1.0 protocol driver instead of the new HTTP/1.1
+driver. You might want to set this to a TRUE value if you discover
+that your old LWP applications fails after you installed LWP-5.60 or
+better.
+
+=item PERL_HTTP_URI_CLASS
+
+Used to decide what URI objects to instantiate. The default is C<URI>.
+You might want to set it to C<URI::URL> for compatibility with old times.
+
+=back
+
+=head1 AUTHORS
+
+LWP was made possible by contributions from Adam Newby, Albert
+Dvornik, Alexandre Duret-Lutz, Andreas Gustafsson, Andreas König,
+Andrew Pimlott, Andy Lester, Ben Coleman, Benjamin Low, Ben Low, Ben
+Tilly, Blair Zajac, Bob Dalgleish, BooK, Brad Hughes, Brian
+J. Murrell, Brian McCauley, Charles C. Fu, Charles Lane, Chris Nandor,
+Christian Gilmore, Chris W. Unger, Craig Macdonald, Dale Couch, Dan
+Kubb, Dave Dunkin, Dave W. Smith, David Coppit, David Dick, David
+D. Kilzer, Doug MacEachern, Edward Avis, erik, Gary Shea, Gisle Aas,
+Graham Barr, Gurusamy Sarathy, Hans de Graaff, Harald Joerg, Harry
+Bochner, Hugo, Ilya Zakharevich, INOUE Yoshinari, Ivan Panchenko, Jack
+Shirazi, James Tillman, Jan Dubois, Jared Rhine, Jim Stern, Joao
+Lopes, John Klar, Johnny Lee, Josh Kronengold, Josh Rai, Joshua
+Chamas, Joshua Hoblitt, Kartik Subbarao, Keiichiro Nagano, Ken
+Williams, KONISHI Katsuhiro, Lee T Lindley, Liam Quinn, Marc Hedlund,
+Marc Langheinrich, Mark D. Anderson, Marko Asplund, Mark Stosberg,
+Markus B Krüger, Markus Laker, Martijn Koster, Martin Thurn, Matthew
+Eldridge, Matthew.van.Eerde, Matt Sergeant, Michael A. Chase, Michael
+Quaranta, Michael Thompson, Mike Schilli, Moshe Kaminsky, Nathan
+Torkington, Nicolai Langfeldt, Norton Allen, Olly Betts, Paul
+J. Schinder, peterm, Philip GuentherDaniel Buenzli, Pon Hwa Lin,
+Radoslaw Zielinski, Radu Greab, Randal L. Schwartz, Richard Chen,
+Robin Barker, Roy Fielding, Sander van Zoest, Sean M. Burke,
+shildreth, Slaven Rezic, Steve A Fink, Steve Hay, Steven Butler,
+Steve_Kilbane, Takanori Ugai, Thomas Lotterer, Tim Bunce, Tom Hughes,
+Tony Finch, Ville Skyttä, Ward Vandewege, William York, Yale Huang,
+and Yitzchak Scott-Thoennes.
+
+LWP owes a lot in motivation, design, and code, to the libwww-perl
+library for Perl4 by Roy Fielding, which included work from Alberto
+Accomazzi, James Casey, Brooks Cutter, Martijn Koster, Oscar
+Nierstrasz, Mel Melchner, Gertjan van Oosten, Jared Rhine, Jack
+Shirazi, Gene Spafford, Marc VanHeyningen, Steven E. Brenner, Marion
+Hakanson, Waldemar Kebsch, Tony Sanders, and Larry Wall; see the
+libwww-perl-0.40 library for details.
+
+=head1 COPYRIGHT
+
+ Copyright 1995-2009, Gisle Aas
+ Copyright 1995, Martijn Koster
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=head1 AVAILABILITY
+
+The latest version of this library is likely to be available from CPAN
+as well as:
+
+ http://github.com/gisle/libwww-perl
+
+The best place to discuss this code is on the <libwww@perl.org>
+mailing list.
+
+=cut
--- /dev/null
+package LWP::Authen::Basic;
+use strict;
+
+require MIME::Base64;
+
+sub auth_header {
+ my($class, $user, $pass) = @_;
+ return "Basic " . MIME::Base64::encode("$user:$pass", "");
+}
+
+sub authenticate
+{
+ my($class, $ua, $proxy, $auth_param, $response,
+ $request, $arg, $size) = @_;
+
+ my $realm = $auth_param->{realm} || "";
+ my $url = $proxy ? $request->{proxy} : $request->uri_canonical;
+ return $response unless $url;
+ my $host_port = $url->host_port;
+ my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization";
+
+ my @m = $proxy ? (m_proxy => $url) : (m_host_port => $host_port);
+ push(@m, realm => $realm);
+
+ my $h = $ua->get_my_handler("request_prepare", @m, sub {
+ $_[0]{callback} = sub {
+ my($req, $ua, $h) = @_;
+ my($user, $pass) = $ua->credentials($host_port, $h->{realm});
+ if (defined $user) {
+ my $auth_value = $class->auth_header($user, $pass, $req, $ua, $h);
+ $req->header($auth_header => $auth_value);
+ }
+ };
+ });
+ $h->{auth_param} = $auth_param;
+
+ if (!$proxy && !$request->header($auth_header) && $ua->credentials($host_port, $realm)) {
+ # we can make sure this handler applies and retry
+ add_path($h, $url->path);
+ return $ua->request($request->clone, $arg, $size, $response);
+ }
+
+ my($user, $pass) = $ua->get_basic_credentials($realm, $url, $proxy);
+ unless (defined $user and defined $pass) {
+ $ua->set_my_handler("request_prepare", undef, @m); # delete handler
+ return $response;
+ }
+
+ # check that the password has changed
+ my ($olduser, $oldpass) = $ua->credentials($host_port, $realm);
+ return $response if (defined $olduser and defined $oldpass and
+ $user eq $olduser and $pass eq $oldpass);
+
+ $ua->credentials($host_port, $realm, $user, $pass);
+ add_path($h, $url->path) unless $proxy;
+ return $ua->request($request->clone, $arg, $size, $response);
+}
+
+sub add_path {
+ my($h, $path) = @_;
+ $path =~ s,[^/]+\z,,;
+ push(@{$h->{m_path_prefix}}, $path);
+}
+
+1;
--- /dev/null
+package LWP::Authen::Digest;
+
+use strict;
+use base 'LWP::Authen::Basic';
+
+require Digest::MD5;
+
+sub auth_header {
+ my($class, $user, $pass, $request, $ua, $h) = @_;
+
+ my $auth_param = $h->{auth_param};
+
+ my $nc = sprintf "%08X", ++$ua->{authen_md5_nonce_count}{$auth_param->{nonce}};
+ my $cnonce = sprintf "%8x", time;
+
+ my $uri = $request->uri->path_query;
+ $uri = "/" unless length $uri;
+
+ my $md5 = Digest::MD5->new;
+
+ my(@digest);
+ $md5->add(join(":", $user, $auth_param->{realm}, $pass));
+ push(@digest, $md5->hexdigest);
+ $md5->reset;
+
+ push(@digest, $auth_param->{nonce});
+
+ if ($auth_param->{qop}) {
+ push(@digest, $nc, $cnonce, ($auth_param->{qop} =~ m|^auth[,;]auth-int$|) ? 'auth' : $auth_param->{qop});
+ }
+
+ $md5->add(join(":", $request->method, $uri));
+ push(@digest, $md5->hexdigest);
+ $md5->reset;
+
+ $md5->add(join(":", @digest));
+ my($digest) = $md5->hexdigest;
+ $md5->reset;
+
+ my %resp = map { $_ => $auth_param->{$_} } qw(realm nonce opaque);
+ @resp{qw(username uri response algorithm)} = ($user, $uri, $digest, "MD5");
+
+ if (($auth_param->{qop} || "") =~ m|^auth([,;]auth-int)?$|) {
+ @resp{qw(qop cnonce nc)} = ("auth", $cnonce, $nc);
+ }
+
+ my(@order) = qw(username realm qop algorithm uri nonce nc cnonce response);
+ if($request->method =~ /^(?:POST|PUT)$/) {
+ $md5->add($request->content);
+ my $content = $md5->hexdigest;
+ $md5->reset;
+ $md5->add(join(":", @digest[0..1], $content));
+ $md5->reset;
+ $resp{"message-digest"} = $md5->hexdigest;
+ push(@order, "message-digest");
+ }
+ push(@order, "opaque");
+ my @pairs;
+ for (@order) {
+ next unless defined $resp{$_};
+ push(@pairs, "$_=" . qq("$resp{$_}"));
+ }
+
+ my $auth_value = "Digest " . join(", ", @pairs);
+ return $auth_value;
+}
+
+1;
--- /dev/null
+package LWP::Authen::Ntlm;
+
+use strict;
+use vars qw/$VERSION/;
+
+$VERSION = '5.835';
+
+use Authen::NTLM "1.02";
+use MIME::Base64 "2.12";
+
+sub authenticate {
+ my($class, $ua, $proxy, $auth_param, $response,
+ $request, $arg, $size) = @_;
+
+ my($user, $pass) = $ua->get_basic_credentials($auth_param->{realm},
+ $request->uri, $proxy);
+
+ unless(defined $user and defined $pass) {
+ return $response;
+ }
+
+ if (!$ua->conn_cache()) {
+ warn "The keep_alive option must be enabled for NTLM authentication to work. NTLM authentication aborted.\n";
+ return $response;
+ }
+
+ my($domain, $username) = split(/\\/, $user);
+
+ ntlm_domain($domain);
+ ntlm_user($username);
+ ntlm_password($pass);
+
+ my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization";
+
+ # my ($challenge) = $response->header('WWW-Authenticate');
+ my $challenge;
+ foreach ($response->header('WWW-Authenticate')) {
+ last if /^NTLM/ && ($challenge=$_);
+ }
+
+ if ($challenge eq 'NTLM') {
+ # First phase, send handshake
+ my $auth_value = "NTLM " . ntlm();
+ ntlm_reset();
+
+ # Need to check this isn't a repeated fail!
+ my $r = $response;
+ my $retry_count = 0;
+ while ($r) {
+ my $auth = $r->request->header($auth_header);
+ ++$retry_count if ($auth && $auth eq $auth_value);
+ if ($retry_count > 2) {
+ # here we know this failed before
+ $response->header("Client-Warning" =>
+ "Credentials for '$user' failed before");
+ return $response;
+ }
+ $r = $r->previous;
+ }
+
+ my $referral = $request->clone;
+ $referral->header($auth_header => $auth_value);
+ return $ua->request($referral, $arg, $size, $response);
+ }
+
+ else {
+ # Second phase, use the response challenge (unless non-401 code
+ # was returned, in which case, we just send back the response
+ # object, as is
+ my $auth_value;
+ if ($response->code ne '401') {
+ return $response;
+ }
+ else {
+ my $challenge;
+ foreach ($response->header('WWW-Authenticate')) {
+ last if /^NTLM/ && ($challenge=$_);
+ }
+ $challenge =~ s/^NTLM //;
+ ntlm();
+ $auth_value = "NTLM " . ntlm($challenge);
+ ntlm_reset();
+ }
+
+ my $referral = $request->clone;
+ $referral->header($auth_header => $auth_value);
+ my $response2 = $ua->request($referral, $arg, $size, $response);
+ return $response2;
+ }
+}
+
+1;
+
+
+=head1 NAME
+
+LWP::Authen::Ntlm - Library for enabling NTLM authentication (Microsoft) in LWP
+
+=head1 SYNOPSIS
+
+ use LWP::UserAgent;
+ use HTTP::Request::Common;
+ my $url = 'http://www.company.com/protected_page.html';
+
+ # Set up the ntlm client and then the base64 encoded ntlm handshake message
+ my $ua = LWP::UserAgent->new(keep_alive=>1);
+ $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword');
+
+ $request = GET $url;
+ print "--Performing request now...-----------\n";
+ $response = $ua->request($request);
+ print "--Done with request-------------------\n";
+
+ if ($response->is_success) {print "It worked!->" . $response->code . "\n"}
+ else {print "It didn't work!->" . $response->code . "\n"}
+
+=head1 DESCRIPTION
+
+C<LWP::Authen::Ntlm> allows LWP to authenticate against servers that are using the
+NTLM authentication scheme popularized by Microsoft. This type of authentication is
+common on intranets of Microsoft-centric organizations.
+
+The module takes advantage of the Authen::NTLM module by Mark Bush. Since there
+is also another Authen::NTLM module available from CPAN by Yee Man Chan with an
+entirely different interface, it is necessary to ensure that you have the correct
+NTLM module.
+
+In addition, there have been problems with incompatibilities between different
+versions of Mime::Base64, which Bush's Authen::NTLM makes use of. Therefore, it is
+necessary to ensure that your Mime::Base64 module supports exporting of the
+encode_base64 and decode_base64 functions.
+
+=head1 USAGE
+
+The module is used indirectly through LWP, rather than including it directly in your
+code. The LWP system will invoke the NTLM authentication when it encounters the
+authentication scheme while attempting to retrieve a URL from a server. In order
+for the NTLM authentication to work, you must have a few things set up in your
+code prior to attempting to retrieve the URL:
+
+=over 4
+
+=item *
+
+Enable persistent HTTP connections
+
+To do this, pass the "keep_alive=>1" option to the LWP::UserAgent when creating it, like this:
+
+ my $ua = LWP::UserAgent->new(keep_alive=>1);
+
+=item *
+
+Set the credentials on the UserAgent object
+
+The credentials must be set like this:
+
+ $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword');
+
+Note that you cannot use the HTTP::Request object's authorization_basic() method to set
+the credentials. Note, too, that the 'www.company.com:80' portion only sets credentials
+on the specified port AND it is case-sensitive (this is due to the way LWP is coded, and
+has nothing to do with LWP::Authen::Ntlm)
+
+=back
+
+=head1 AVAILABILITY
+
+General queries regarding LWP should be made to the LWP Mailing List.
+
+Questions specific to LWP::Authen::Ntlm can be forwarded to jtillman@bigfoot.com
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 James Tillman. All rights reserved. This
+program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<LWP>, L<LWP::UserAgent>, L<lwpcook>.
--- /dev/null
+package LWP::ConnCache;
+
+use strict;
+use vars qw($VERSION $DEBUG);
+
+$VERSION = "5.810";
+
+
+sub new {
+ my($class, %cnf) = @_;
+ my $total_capacity = delete $cnf{total_capacity};
+ $total_capacity = 1 unless defined $total_capacity;
+ if (%cnf && $^W) {
+ require Carp;
+ Carp::carp("Unrecognised options: @{[sort keys %cnf]}")
+ }
+ my $self = bless { cc_conns => [] }, $class;
+ $self->total_capacity($total_capacity);
+ $self;
+}
+
+
+sub deposit {
+ my($self, $type, $key, $conn) = @_;
+ push(@{$self->{cc_conns}}, [$conn, $type, $key, time]);
+ $self->enforce_limits($type);
+ return;
+}
+
+
+sub withdraw {
+ my($self, $type, $key) = @_;
+ my $conns = $self->{cc_conns};
+ for my $i (0 .. @$conns - 1) {
+ my $c = $conns->[$i];
+ next unless $c->[1] eq $type && $c->[2] eq $key;
+ splice(@$conns, $i, 1); # remove it
+ return $c->[0];
+ }
+ return undef;
+}
+
+
+sub total_capacity {
+ my $self = shift;
+ my $old = $self->{cc_limit_total};
+ if (@_) {
+ $self->{cc_limit_total} = shift;
+ $self->enforce_limits;
+ }
+ $old;
+}
+
+
+sub capacity {
+ my $self = shift;
+ my $type = shift;
+ my $old = $self->{cc_limit}{$type};
+ if (@_) {
+ $self->{cc_limit}{$type} = shift;
+ $self->enforce_limits($type);
+ }
+ $old;
+}
+
+
+sub enforce_limits {
+ my($self, $type) = @_;
+ my $conns = $self->{cc_conns};
+
+ my @types = $type ? ($type) : ($self->get_types);
+ for $type (@types) {
+ next unless $self->{cc_limit};
+ my $limit = $self->{cc_limit}{$type};
+ next unless defined $limit;
+ for my $i (reverse 0 .. @$conns - 1) {
+ next unless $conns->[$i][1] eq $type;
+ if (--$limit < 0) {
+ $self->dropping(splice(@$conns, $i, 1), "$type capacity exceeded");
+ }
+ }
+ }
+
+ if (defined(my $total = $self->{cc_limit_total})) {
+ while (@$conns > $total) {
+ $self->dropping(shift(@$conns), "Total capacity exceeded");
+ }
+ }
+}
+
+
+sub dropping {
+ my($self, $c, $reason) = @_;
+ print "DROPPING @$c [$reason]\n" if $DEBUG;
+}
+
+
+sub drop {
+ my($self, $checker, $reason) = @_;
+ if (ref($checker) ne "CODE") {
+ # make it so
+ if (!defined $checker) {
+ $checker = sub { 1 }; # drop all of them
+ }
+ elsif (_looks_like_number($checker)) {
+ my $age_limit = $checker;
+ my $time_limit = time - $age_limit;
+ $reason ||= "older than $age_limit";
+ $checker = sub { $_[3] < $time_limit };
+ }
+ else {
+ my $type = $checker;
+ $reason ||= "drop $type";
+ $checker = sub { $_[1] eq $type }; # match on type
+ }
+ }
+ $reason ||= "drop";
+
+ local $SIG{__DIE__}; # don't interfere with eval below
+ local $@;
+ my @c;
+ for (@{$self->{cc_conns}}) {
+ my $drop;
+ eval {
+ if (&$checker(@$_)) {
+ $self->dropping($_, $reason);
+ $drop++;
+ }
+ };
+ push(@c, $_) unless $drop;
+ }
+ @{$self->{cc_conns}} = @c;
+}
+
+
+sub prune {
+ my $self = shift;
+ $self->drop(sub { !shift->ping }, "ping");
+}
+
+
+sub get_types {
+ my $self = shift;
+ my %t;
+ $t{$_->[1]}++ for @{$self->{cc_conns}};
+ return keys %t;
+}
+
+
+sub get_connections {
+ my($self, $type) = @_;
+ my @c;
+ for (@{$self->{cc_conns}}) {
+ push(@c, $_->[0]) if !$type || ($type && $type eq $_->[1]);
+ }
+ @c;
+}
+
+
+sub _looks_like_number {
+ $_[0] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::ConnCache - Connection cache manager
+
+=head1 NOTE
+
+This module is experimental. Details of its interface is likely to
+change in the future.
+
+=head1 SYNOPSIS
+
+ use LWP::ConnCache;
+ my $cache = LWP::ConnCache->new;
+ $cache->deposit($type, $key, $sock);
+ $sock = $cache->withdraw($type, $key);
+
+=head1 DESCRIPTION
+
+The C<LWP::ConnCache> class is the standard connection cache manager
+for LWP::UserAgent.
+
+The following basic methods are provided:
+
+=over
+
+=item $cache = LWP::ConnCache->new( %options )
+
+This method constructs a new C<LWP::ConnCache> object. The only
+option currently accepted is 'total_capacity'. If specified it
+initialize the total_capacity option. It defaults to the value 1.
+
+=item $cache->total_capacity( [$num_connections] )
+
+Get/sets the number of connection that will be cached. Connections
+will start to be dropped when this limit is reached. If set to C<0>,
+then all connections are immediately dropped. If set to C<undef>,
+then there is no limit.
+
+=item $cache->capacity($type, [$num_connections] )
+
+Get/set a limit for the number of connections of the specified type
+that can be cached. The $type will typically be a short string like
+"http" or "ftp".
+
+=item $cache->drop( [$checker, [$reason]] )
+
+Drop connections by some criteria. The $checker argument is a
+subroutine that is called for each connection. If the routine returns
+a TRUE value then the connection is dropped. The routine is called
+with ($conn, $type, $key, $deposit_time) as arguments.
+
+Shortcuts: If the $checker argument is absent (or C<undef>) all cached
+connections are dropped. If the $checker is a number then all
+connections untouched that the given number of seconds or more are
+dropped. If $checker is a string then all connections of the given
+type are dropped.
+
+The $reason argument is passed on to the dropped() method.
+
+=item $cache->prune
+
+Calling this method will drop all connections that are dead. This is
+tested by calling the ping() method on the connections. If the ping()
+method exists and returns a FALSE value, then the connection is
+dropped.
+
+=item $cache->get_types
+
+This returns all the 'type' fields used for the currently cached
+connections.
+
+=item $cache->get_connections( [$type] )
+
+This returns all connection objects of the specified type. If no type
+is specified then all connections are returned. In scalar context the
+number of cached connections of the specified type is returned.
+
+=back
+
+
+The following methods are called by low-level protocol modules to
+try to save away connections and to get them back.
+
+=over
+
+=item $cache->deposit($type, $key, $conn)
+
+This method adds a new connection to the cache. As a result other
+already cached connections might be dropped. Multiple connections with
+the same $type/$key might added.
+
+=item $conn = $cache->withdraw($type, $key)
+
+This method tries to fetch back a connection that was previously
+deposited. If no cached connection with the specified $type/$key is
+found, then C<undef> is returned. There is not guarantee that a
+deposited connection can be withdrawn, as the cache manger is free to
+drop connections at any time.
+
+=back
+
+The following methods are called internally. Subclasses might want to
+override them.
+
+=over
+
+=item $conn->enforce_limits([$type])
+
+This method is called with after a new connection is added (deposited)
+in the cache or capacity limits are adjusted. The default
+implementation drops connections until the specified capacity limits
+are not exceeded.
+
+=item $conn->dropping($conn_record, $reason)
+
+This method is called when a connection is dropped. The record
+belonging to the dropped connection is passed as the first argument
+and a string describing the reason for the drop is passed as the
+second argument. The default implementation makes some noise if the
+$LWP::ConnCache::DEBUG variable is set and nothing more.
+
+=back
+
+=head1 SUBCLASSING
+
+For specialized cache policy it makes sense to subclass
+C<LWP::ConnCache> and perhaps override the deposit(), enforce_limits()
+and dropping() methods.
+
+The object itself is a hash. Keys prefixed with C<cc_> are reserved
+for the base class.
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>
+
+=head1 COPYRIGHT
+
+Copyright 2001 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package LWP::Debug; # legacy
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT_OK = qw(level trace debug conns);
+
+use Carp ();
+
+my @levels = qw(trace debug conns);
+%current_level = ();
+
+
+sub import
+{
+ my $pack = shift;
+ my $callpkg = caller(0);
+ my @symbols = ();
+ my @levels = ();
+ for (@_) {
+ if (/^[-+]/) {
+ push(@levels, $_);
+ }
+ else {
+ push(@symbols, $_);
+ }
+ }
+ Exporter::export($pack, $callpkg, @symbols);
+ level(@levels);
+}
+
+
+sub level
+{
+ for (@_) {
+ if ($_ eq '+') { # all on
+ # switch on all levels
+ %current_level = map { $_ => 1 } @levels;
+ }
+ elsif ($_ eq '-') { # all off
+ %current_level = ();
+ }
+ elsif (/^([-+])(\w+)$/) {
+ $current_level{$2} = $1 eq '+';
+ }
+ else {
+ Carp::croak("Illegal level format $_");
+ }
+ }
+}
+
+
+sub trace { _log(@_) if $current_level{'trace'}; }
+sub debug { _log(@_) if $current_level{'debug'}; }
+sub conns { _log(@_) if $current_level{'conns'}; }
+
+
+sub _log
+{
+ my $msg = shift;
+ $msg .= "\n" unless $msg =~ /\n$/; # ensure trailing "\n"
+
+ my($package,$filename,$line,$sub) = caller(2);
+ print STDERR "$sub: $msg";
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::Debug - deprecated
+
+=head1 DESCRIPTION
+
+LWP::Debug used to provide tracing facilities, but these are not used
+by LWP any more. The code in this module is kept around
+(undocumented) so that 3rd party code that happen to use the old
+interfaces continue to run.
+
+One useful feature that LWP::Debug provided (in an imprecise and
+troublesome way) was network traffic monitoring. The following
+section provide some hints about recommened replacements.
+
+=head2 Network traffic monitoring
+
+The best way to monitor the network traffic that LWP generates is to
+use an external TCP monitoring program. The Wireshark program
+(L<http://www.wireshark.org/>) is higly recommended for this.
+
+Another approach it to use a debugging HTTP proxy server and make
+LWP direct all its traffic via this one. Call C<< $ua->proxy >> to
+set it up and then just use LWP as before.
+
+For less precise monitoring needs just setting up a few simple
+handlers might do. The following example sets up handlers to dump the
+request and response objects that pass through LWP:
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
+
+ $ua->add_handler("request_send", sub { shift->dump; return });
+ $ua->add_handler("response_done", sub { shift->dump; return });
+
+ $ua->get("http://www.example.com");
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>
--- /dev/null
+package LWP::DebugFile;
+
+# legacy stub
+
+1;
--- /dev/null
+package LWP::MediaTypes;
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(guess_media_type media_suffix);
+@EXPORT_OK = qw(add_type add_encoding read_media_types);
+$VERSION = "5.835";
+
+use strict;
+
+# note: These hashes will also be filled with the entries found in
+# the 'media.types' file.
+
+my %suffixType = (
+ 'txt' => 'text/plain',
+ 'html' => 'text/html',
+ 'gif' => 'image/gif',
+ 'jpg' => 'image/jpeg',
+ 'xml' => 'text/xml',
+);
+
+my %suffixExt = (
+ 'text/plain' => 'txt',
+ 'text/html' => 'html',
+ 'image/gif' => 'gif',
+ 'image/jpeg' => 'jpg',
+ 'text/xml' => 'xml',
+);
+
+#XXX: there should be some way to define this in the media.types files.
+my %suffixEncoding = (
+ 'Z' => 'compress',
+ 'gz' => 'gzip',
+ 'hqx' => 'x-hqx',
+ 'uu' => 'x-uuencode',
+ 'z' => 'x-pack',
+ 'bz2' => 'x-bzip2',
+);
+
+read_media_types();
+
+
+
+sub _dump {
+ require Data::Dumper;
+ Data::Dumper->new([\%suffixType, \%suffixExt, \%suffixEncoding],
+ [qw(*suffixType *suffixExt *suffixEncoding)])->Dump;
+}
+
+
+sub guess_media_type
+{
+ my($file, $header) = @_;
+ return undef unless defined $file;
+
+ my $fullname;
+ if (ref($file)) {
+ # assume URI object
+ $file = $file->path;
+ #XXX should handle non http:, file: or ftp: URIs differently
+ }
+ else {
+ $fullname = $file; # enable peek at actual file
+ }
+
+ my @encoding = ();
+ my $ct = undef;
+ for (file_exts($file)) {
+ # first check this dot part as encoding spec
+ if (exists $suffixEncoding{$_}) {
+ unshift(@encoding, $suffixEncoding{$_});
+ next;
+ }
+ if (exists $suffixEncoding{lc $_}) {
+ unshift(@encoding, $suffixEncoding{lc $_});
+ next;
+ }
+
+ # check content-type
+ if (exists $suffixType{$_}) {
+ $ct = $suffixType{$_};
+ last;
+ }
+ if (exists $suffixType{lc $_}) {
+ $ct = $suffixType{lc $_};
+ last;
+ }
+
+ # don't know nothing about this dot part, bail out
+ last;
+ }
+ unless (defined $ct) {
+ # Take a look at the file
+ if (defined $fullname) {
+ $ct = (-T $fullname) ? "text/plain" : "application/octet-stream";
+ }
+ else {
+ $ct = "application/octet-stream";
+ }
+ }
+
+ if ($header) {
+ $header->header('Content-Type' => $ct);
+ $header->header('Content-Encoding' => \@encoding) if @encoding;
+ }
+
+ wantarray ? ($ct, @encoding) : $ct;
+}
+
+
+sub media_suffix {
+ if (!wantarray && @_ == 1 && $_[0] !~ /\*/) {
+ return $suffixExt{lc $_[0]};
+ }
+ my(@type) = @_;
+ my(@suffix, $ext, $type);
+ foreach (@type) {
+ if (s/\*/.*/) {
+ while(($ext,$type) = each(%suffixType)) {
+ push(@suffix, $ext) if $type =~ /^$_$/i;
+ }
+ }
+ else {
+ my $ltype = lc $_;
+ while(($ext,$type) = each(%suffixType)) {
+ push(@suffix, $ext) if lc $type eq $ltype;
+ }
+ }
+ }
+ wantarray ? @suffix : $suffix[0];
+}
+
+
+sub file_exts
+{
+ require File::Basename;
+ my @parts = reverse split(/\./, File::Basename::basename($_[0]));
+ pop(@parts); # never consider first part
+ @parts;
+}
+
+
+sub add_type
+{
+ my($type, @exts) = @_;
+ for my $ext (@exts) {
+ $ext =~ s/^\.//;
+ $suffixType{$ext} = $type;
+ }
+ $suffixExt{lc $type} = $exts[0] if @exts;
+}
+
+
+sub add_encoding
+{
+ my($type, @exts) = @_;
+ for my $ext (@exts) {
+ $ext =~ s/^\.//;
+ $suffixEncoding{$ext} = $type;
+ }
+}
+
+
+sub read_media_types
+{
+ my(@files) = @_;
+
+ local($/, $_) = ("\n", undef); # ensure correct $INPUT_RECORD_SEPARATOR
+
+ my @priv_files = ();
+ if($^O eq "MacOS") {
+ push(@priv_files, "$ENV{HOME}:media.types", "$ENV{HOME}:mime.types")
+ if defined $ENV{HOME}; # Some does not have a home (for instance Win32)
+ }
+ else {
+ push(@priv_files, "$ENV{HOME}/.media.types", "$ENV{HOME}/.mime.types")
+ if defined $ENV{HOME}; # Some doesn't have a home (for instance Win32)
+ }
+
+ # Try to locate "media.types" file, and initialize %suffixType from it
+ my $typefile;
+ unless (@files) {
+ if($^O eq "MacOS") {
+ @files = map {$_."LWP:media.types"} @INC;
+ }
+ else {
+ @files = map {"$_/LWP/media.types"} @INC;
+ }
+ push @files, @priv_files;
+ }
+ for $typefile (@files) {
+ local(*TYPE);
+ open(TYPE, $typefile) || next;
+ while (<TYPE>) {
+ next if /^\s*#/; # comment line
+ next if /^\s*$/; # blank line
+ s/#.*//; # remove end-of-line comments
+ my($type, @exts) = split(' ', $_);
+ add_type($type, @exts);
+ }
+ close(TYPE);
+ }
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::MediaTypes - guess media type for a file or a URL
+
+=head1 SYNOPSIS
+
+ use LWP::MediaTypes qw(guess_media_type);
+ $type = guess_media_type("/tmp/foo.gif");
+
+=head1 DESCRIPTION
+
+This module provides functions for handling media (also known as
+MIME) types and encodings. The mapping from file extensions to media
+types is defined by the F<media.types> file. If the F<~/.media.types>
+file exists it is used instead.
+For backwards compatibility we will also look for F<~/.mime.types>.
+
+The following functions are exported by default:
+
+=over 4
+
+=item guess_media_type( $filename )
+
+=item guess_media_type( $uri )
+
+=item guess_media_type( $filename_or_uri, $header_to_modify )
+
+This function tries to guess media type and encoding for a file or a URI.
+It returns the content type, which is a string like C<"text/html">.
+In array context it also returns any content encodings applied (in the
+order used to encode the file). You can pass a URI object
+reference, instead of the file name.
+
+If the type can not be deduced from looking at the file name,
+then guess_media_type() will let the C<-T> Perl operator take a look.
+If this works (and C<-T> returns a TRUE value) then we return
+I<text/plain> as the type, otherwise we return
+I<application/octet-stream> as the type.
+
+The optional second argument should be a reference to a HTTP::Headers
+object or any object that implements the $obj->header method in a
+similar way. When it is present the values of the
+'Content-Type' and 'Content-Encoding' will be set for this header.
+
+=item media_suffix( $type, ... )
+
+This function will return all suffixes that can be used to denote the
+specified media type(s). Wildcard types can be used. In a scalar
+context it will return the first suffix found. Examples:
+
+ @suffixes = media_suffix('image/*', 'audio/basic');
+ $suffix = media_suffix('text/html');
+
+=back
+
+The following functions are only exported by explicit request:
+
+=over 4
+
+=item add_type( $type, @exts )
+
+Associate a list of file extensions with the given media type.
+Example:
+
+ add_type("x-world/x-vrml" => qw(wrl vrml));
+
+=item add_encoding( $type, @ext )
+
+Associate a list of file extensions with an encoding type.
+Example:
+
+ add_encoding("x-gzip" => "gz");
+
+=item read_media_types( @files )
+
+Parse media types files and add the type mappings found there.
+Example:
+
+ read_media_types("conf/mime.types");
+
+=back
+
+=head1 COPYRIGHT
+
+Copyright 1995-1999 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package LWP::MemberMixin;
+
+sub _elem
+{
+ my $self = shift;
+ my $elem = shift;
+ my $old = $self->{$elem};
+ $self->{$elem} = shift if @_;
+ return $old;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::MemberMixin - Member access mixin class
+
+=head1 SYNOPSIS
+
+ package Foo;
+ require LWP::MemberMixin;
+ @ISA=qw(LWP::MemberMixin);
+
+=head1 DESCRIPTION
+
+A mixin class to get methods that provide easy access to member
+variables in the %$self.
+Ideally there should be better Perl language support for this.
+
+There is only one method provided:
+
+=over 4
+
+=item _elem($elem [, $val])
+
+Internal method to get/set the value of member variable
+C<$elem>. If C<$val> is present it is used as the new value
+for the member variable. If it is not present the current
+value is not touched. In both cases the previous value of
+the member variable is returned.
+
+=back
--- /dev/null
+package LWP::Protocol;
+
+require LWP::MemberMixin;
+@ISA = qw(LWP::MemberMixin);
+$VERSION = "5.829";
+
+use strict;
+use Carp ();
+use HTTP::Status ();
+use HTTP::Response;
+
+my %ImplementedBy = (); # scheme => classname
+
+
+
+sub new
+{
+ my($class, $scheme, $ua) = @_;
+
+ my $self = bless {
+ scheme => $scheme,
+ ua => $ua,
+
+ # historical/redundant
+ max_size => $ua->{max_size},
+ }, $class;
+
+ $self;
+}
+
+
+sub create
+{
+ my($scheme, $ua) = @_;
+ my $impclass = LWP::Protocol::implementor($scheme) or
+ Carp::croak("Protocol scheme '$scheme' is not supported");
+
+ # hand-off to scheme specific implementation sub-class
+ my $protocol = $impclass->new($scheme, $ua);
+
+ return $protocol;
+}
+
+
+sub implementor
+{
+ my($scheme, $impclass) = @_;
+
+ if ($impclass) {
+ $ImplementedBy{$scheme} = $impclass;
+ }
+ my $ic = $ImplementedBy{$scheme};
+ return $ic if $ic;
+
+ return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes
+ $scheme = $1; # untaint
+ $scheme =~ s/[.+\-]/_/g; # make it a legal module name
+
+ # scheme not yet known, look for a 'use'd implementation
+ $ic = "LWP::Protocol::$scheme"; # default location
+ $ic = "LWP::Protocol::nntp" if $scheme eq 'news'; #XXX ugly hack
+ no strict 'refs';
+ # check we actually have one for the scheme:
+ unless (@{"${ic}::ISA"}) {
+ # try to autoload it
+ eval "require $ic";
+ if ($@) {
+ if ($@ =~ /Can't locate/) { #' #emacs get confused by '
+ $ic = '';
+ }
+ else {
+ die "$@\n";
+ }
+ }
+ }
+ $ImplementedBy{$scheme} = $ic if $ic;
+ $ic;
+}
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+ Carp::croak('LWP::Protocol::request() needs to be overridden in subclasses');
+}
+
+
+# legacy
+sub timeout { shift->_elem('timeout', @_); }
+sub max_size { shift->_elem('max_size', @_); }
+
+
+sub collect
+{
+ my ($self, $arg, $response, $collector) = @_;
+ my $content;
+ my($ua, $max_size) = @{$self}{qw(ua max_size)};
+
+ eval {
+ local $\; # protect the print below from surprises
+ if (!defined($arg) || !$response->is_success) {
+ $response->{default_add_content} = 1;
+ }
+ elsif (!ref($arg) && length($arg)) {
+ open(my $fh, ">", $arg) or die "Can't write to '$arg': $!";
+ binmode($fh);
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ print $fh $_[3] or die "Can't write to '$arg': $!";
+ 1;
+ },
+ });
+ push(@{$response->{handlers}{response_done}}, {
+ callback => sub {
+ close($fh) or die "Can't write to '$arg': $!";
+ undef($fh);
+ },
+ });
+ }
+ elsif (ref($arg) eq 'CODE') {
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ &$arg($_[3], $_[0], $self);
+ 1;
+ },
+ });
+ }
+ else {
+ die "Unexpected collect argument '$arg'";
+ }
+
+ $ua->run_handlers("response_header", $response);
+
+ if (delete $response->{default_add_content}) {
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ $_[0]->add_content($_[3]);
+ 1;
+ },
+ });
+ }
+
+
+ my $content_size = 0;
+ my $length = $response->content_length;
+ my %skip_h;
+
+ while ($content = &$collector, length $$content) {
+ for my $h ($ua->handlers("response_data", $response)) {
+ next if $skip_h{$h};
+ unless ($h->{callback}->($response, $ua, $h, $$content)) {
+ # XXX remove from $response->{handlers}{response_data} if present
+ $skip_h{$h}++;
+ }
+ }
+ $content_size += length($$content);
+ $ua->progress(($length ? ($content_size / $length) : "tick"), $response);
+ if (defined($max_size) && $content_size > $max_size) {
+ $response->push_header("Client-Aborted", "max_size");
+ last;
+ }
+ }
+ };
+ my $err = $@;
+ delete $response->{handlers}{response_data};
+ delete $response->{handlers} unless %{$response->{handlers}};
+ if ($err) {
+ chomp($err);
+ $response->push_header('X-Died' => $err);
+ $response->push_header("Client-Aborted", "die");
+ return $response;
+ }
+
+ return $response;
+}
+
+
+sub collect_once
+{
+ my($self, $arg, $response) = @_;
+ my $content = \ $_[3];
+ my $first = 1;
+ $self->collect($arg, $response, sub {
+ return $content if $first--;
+ return \ "";
+ });
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::Protocol - Base class for LWP protocols
+
+=head1 SYNOPSIS
+
+ package LWP::Protocol::foo;
+ require LWP::Protocol;
+ @ISA=qw(LWP::Protocol);
+
+=head1 DESCRIPTION
+
+This class is used a the base class for all protocol implementations
+supported by the LWP library.
+
+When creating an instance of this class using
+C<LWP::Protocol::create($url)>, and you get an initialised subclass
+appropriate for that access method. In other words, the
+LWP::Protocol::create() function calls the constructor for one of its
+subclasses.
+
+All derived LWP::Protocol classes need to override the request()
+method which is used to service a request. The overridden method can
+make use of the collect() function to collect together chunks of data
+as it is received.
+
+The following methods and functions are provided:
+
+=over 4
+
+=item $prot = LWP::Protocol->new()
+
+The LWP::Protocol constructor is inherited by subclasses. As this is a
+virtual base class this method should B<not> be called directly.
+
+=item $prot = LWP::Protocol::create($scheme)
+
+Create an object of the class implementing the protocol to handle the
+given scheme. This is a function, not a method. It is more an object
+factory than a constructor. This is the function user agents should
+use to access protocols.
+
+=item $class = LWP::Protocol::implementor($scheme, [$class])
+
+Get and/or set implementor class for a scheme. Returns '' if the
+specified scheme is not supported.
+
+=item $prot->request(...)
+
+ $response = $protocol->request($request, $proxy, undef);
+ $response = $protocol->request($request, $proxy, '/tmp/sss');
+ $response = $protocol->request($request, $proxy, \&callback, 1024);
+
+Dispatches a request over the protocol, and returns a response
+object. This method needs to be overridden in subclasses. Refer to
+L<LWP::UserAgent> for description of the arguments.
+
+=item $prot->collect($arg, $response, $collector)
+
+Called to collect the content of a request, and process it
+appropriately into a scalar, file, or by calling a callback. If $arg
+is undefined, then the content is stored within the $response. If
+$arg is a simple scalar, then $arg is interpreted as a file name and
+the content is written to this file. If $arg is a reference to a
+routine, then content is passed to this routine.
+
+The $collector is a routine that will be called and which is
+responsible for returning pieces (as ref to scalar) of the content to
+process. The $collector signals EOF by returning a reference to an
+empty sting.
+
+The return value from collect() is the $response object reference.
+
+B<Note:> We will only use the callback or file argument if
+$response->is_success(). This avoids sending content data for
+redirects and authentication responses to the callback which would be
+confusing.
+
+=item $prot->collect_once($arg, $response, $content)
+
+Can be called when the whole response content is available as
+$content. This will invoke collect() with a collector callback that
+returns a reference to $content the first time and an empty string the
+next.
+
+=back
+
+=head1 SEE ALSO
+
+Inspect the F<LWP/Protocol/file.pm> and F<LWP/Protocol/http.pm> files
+for examples of usage.
+
+=head1 COPYRIGHT
+
+Copyright 1995-2001 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package LWP::Protocol::GHTTP;
+
+# You can tell LWP to use this module for 'http' requests by running
+# code like this before you make requests:
+#
+# require LWP::Protocol::GHTTP;
+# LWP::Protocol::implementor('http', 'LWP::Protocol::GHTTP');
+#
+
+use strict;
+use vars qw(@ISA);
+
+require LWP::Protocol;
+@ISA=qw(LWP::Protocol);
+
+require HTTP::Response;
+require HTTP::Status;
+
+use HTTP::GHTTP qw(METHOD_GET METHOD_HEAD METHOD_POST);
+
+my %METHOD =
+(
+ GET => METHOD_GET,
+ HEAD => METHOD_HEAD,
+ POST => METHOD_POST,
+);
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ my $method = $request->method;
+ unless (exists $METHOD{$method}) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Bad method '$method'");
+ }
+
+ my $r = HTTP::GHTTP->new($request->uri);
+
+ # XXX what headers for repeated headers here?
+ $request->headers->scan(sub { $r->set_header(@_)});
+
+ $r->set_type($METHOD{$method});
+
+ # XXX should also deal with subroutine content.
+ my $cref = $request->content_ref;
+ $r->set_body($$cref) if length($$cref);
+
+ # XXX is this right
+ $r->set_proxy($proxy->as_string) if $proxy;
+
+ $r->process_request;
+
+ my $response = HTTP::Response->new($r->get_status);
+
+ # XXX How can get the headers out of $r?? This way is too stupid.
+ my @headers;
+ eval {
+ # Wrapped in eval because this method is not always available
+ @headers = $r->get_headers;
+ };
+ @headers = qw(Date Connection Server Content-type
+ Accept-Ranges Server
+ Content-Length Last-Modified ETag) if $@;
+ for (@headers) {
+ my $v = $r->get_header($_);
+ $response->header($_ => $v) if defined $v;
+ }
+
+ return $self->collect_once($arg, $response, $r->get_body);
+}
+
+1;
--- /dev/null
+package LWP::Protocol::cpan;
+
+use strict;
+use vars qw(@ISA);
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+require URI;
+require HTTP::Status;
+require HTTP::Response;
+
+our $CPAN;
+
+unless ($CPAN) {
+ # Try to find local CPAN mirror via $CPAN::Config
+ eval {
+ require CPAN::Config;
+ if($CPAN::Config) {
+ my $urls = $CPAN::Config->{urllist};
+ if (ref($urls) eq "ARRAY") {
+ my $file;
+ for (@$urls) {
+ if (/^file:/) {
+ $file = $_;
+ last;
+ }
+ }
+
+ if ($file) {
+ $CPAN = $file;
+ }
+ else {
+ $CPAN = $urls->[0];
+ }
+ }
+ }
+ };
+
+ $CPAN ||= "http://cpan.org/"; # last resort
+}
+
+# ensure that we don't chop of last part
+$CPAN .= "/" unless $CPAN =~ m,/$,;
+
+
+sub request {
+ my($self, $request, $proxy, $arg, $size) = @_;
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy with cpan');
+ }
+
+ # check method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'cpan:' URLs");
+ }
+
+ my $path = $request->uri->path;
+ $path =~ s,^/,,;
+
+ my $response = HTTP::Response->new(&HTTP::Status::RC_FOUND);
+ $response->header("Location" => URI->new_abs($path, $CPAN));
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::data;
+
+# Implements access to data:-URLs as specified in RFC 2397
+
+use strict;
+use vars qw(@ISA);
+
+require HTTP::Response;
+require HTTP::Status;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+use HTTP::Date qw(time2str);
+require LWP; # needs version number
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size) = @_;
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy with data');
+ }
+
+ # check method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'data:' URLs");
+ }
+
+ my $url = $request->uri;
+ my $response = HTTP::Response->new( &HTTP::Status::RC_OK, "Document follows");
+
+ my $media_type = $url->media_type;
+
+ my $data = $url->data;
+ $response->header('Content-Type' => $media_type,
+ 'Content-Length' => length($data),
+ 'Date' => time2str(time),
+ 'Server' => "libwww-perl-internal/$LWP::VERSION"
+ );
+
+ $data = "" if $method eq "HEAD";
+ return $self->collect_once($arg, $response, $data);
+}
+
+1;
--- /dev/null
+package LWP::Protocol::file;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+use strict;
+
+require LWP::MediaTypes;
+require HTTP::Request;
+require HTTP::Response;
+require HTTP::Status;
+require HTTP::Date;
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size) = @_;
+
+ $size = 4096 unless defined $size and $size > 0;
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through the filesystem');
+ }
+
+ # check method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'file:' URLs");
+ }
+
+ # check url
+ my $url = $request->uri;
+
+ my $scheme = $url->scheme;
+ if ($scheme ne 'file') {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::file::request called for '$scheme'");
+ }
+
+ # URL OK, look at file
+ my $path = $url->file;
+
+ # test file exists and is readable
+ unless (-e $path) {
+ return HTTP::Response->new( &HTTP::Status::RC_NOT_FOUND,
+ "File `$path' does not exist");
+ }
+ unless (-r _) {
+ return HTTP::Response->new( &HTTP::Status::RC_FORBIDDEN,
+ 'User does not have read permission');
+ }
+
+ # looks like file exists
+ my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$filesize,
+ $atime,$mtime,$ctime,$blksize,$blocks)
+ = stat(_);
+
+ # XXX should check Accept headers?
+
+ # check if-modified-since
+ my $ims = $request->header('If-Modified-Since');
+ if (defined $ims) {
+ my $time = HTTP::Date::str2time($ims);
+ if (defined $time and $time >= $mtime) {
+ return HTTP::Response->new( &HTTP::Status::RC_NOT_MODIFIED,
+ "$method $path");
+ }
+ }
+
+ # Ok, should be an OK response by now...
+ my $response = HTTP::Response->new( &HTTP::Status::RC_OK );
+
+ # fill in response headers
+ $response->header('Last-Modified', HTTP::Date::time2str($mtime));
+
+ if (-d _) { # If the path is a directory, process it
+ # generate the HTML for directory
+ opendir(D, $path) or
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Cannot read directory '$path': $!");
+ my(@files) = sort readdir(D);
+ closedir(D);
+
+ # Make directory listing
+ require URI::Escape;
+ require HTML::Entities;
+ my $pathe = $path . ( $^O eq 'MacOS' ? ':' : '/');
+ for (@files) {
+ my $furl = URI::Escape::uri_escape($_);
+ if ( -d "$pathe$_" ) {
+ $furl .= '/';
+ $_ .= '/';
+ }
+ my $desc = HTML::Entities::encode($_);
+ $_ = qq{<LI><A HREF="$furl">$desc</A>};
+ }
+ # Ensure that the base URL is "/" terminated
+ my $base = $url->clone;
+ unless ($base->path =~ m|/$|) {
+ $base->path($base->path . "/");
+ }
+ my $html = join("\n",
+ "<HTML>\n<HEAD>",
+ "<TITLE>Directory $path</TITLE>",
+ "<BASE HREF=\"$base\">",
+ "</HEAD>\n<BODY>",
+ "<H1>Directory listing of $path</H1>",
+ "<UL>", @files, "</UL>",
+ "</BODY>\n</HTML>\n");
+
+ $response->header('Content-Type', 'text/html');
+ $response->header('Content-Length', length $html);
+ $html = "" if $method eq "HEAD";
+
+ return $self->collect_once($arg, $response, $html);
+
+ }
+
+ # path is a regular file
+ $response->header('Content-Length', $filesize);
+ LWP::MediaTypes::guess_media_type($path, $response);
+
+ # read the file
+ if ($method ne "HEAD") {
+ open(F, $path) or return new
+ HTTP::Response(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Cannot read file '$path': $!");
+ binmode(F);
+ $response = $self->collect($arg, $response, sub {
+ my $content = "";
+ my $bytes = sysread(F, $content, $size);
+ return \$content if $bytes > 0;
+ return \ "";
+ });
+ close(F);
+ }
+
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::ftp;
+
+# Implementation of the ftp protocol (RFC 959). We let the Net::FTP
+# package do all the dirty work.
+
+use Carp ();
+
+use HTTP::Status ();
+use HTTP::Negotiate ();
+use HTTP::Response ();
+use LWP::MediaTypes ();
+use File::Listing ();
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+use strict;
+eval {
+ package LWP::Protocol::MyFTP;
+
+ require Net::FTP;
+ Net::FTP->require_version(2.00);
+
+ use vars qw(@ISA);
+ @ISA=qw(Net::FTP);
+
+ sub new {
+ my $class = shift;
+
+ my $self = $class->SUPER::new(@_) || return undef;
+
+ my $mess = $self->message; # welcome message
+ $mess =~ s|\n.*||s; # only first line left
+ $mess =~ s|\s*ready\.?$||;
+ # Make the version number more HTTP like
+ $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
+ ${*$self}{myftp_server} = $mess;
+ #$response->header("Server", $mess);
+
+ $self;
+ }
+
+ sub http_server {
+ my $self = shift;
+ ${*$self}{myftp_server};
+ }
+
+ sub home {
+ my $self = shift;
+ my $old = ${*$self}{myftp_home};
+ if (@_) {
+ ${*$self}{myftp_home} = shift;
+ }
+ $old;
+ }
+
+ sub go_home {
+ my $self = shift;
+ $self->cwd(${*$self}{myftp_home});
+ }
+
+ sub request_count {
+ my $self = shift;
+ ++${*$self}{myftp_reqcount};
+ }
+
+ sub ping {
+ my $self = shift;
+ return $self->go_home;
+ }
+
+};
+my $init_failed = $@;
+
+
+sub _connect {
+ my($self, $host, $port, $user, $account, $password, $timeout) = @_;
+
+ my $key;
+ my $conn_cache = $self->{ua}{conn_cache};
+ if ($conn_cache) {
+ $key = "$host:$port:$user";
+ $key .= ":$account" if defined($account);
+ if (my $ftp = $conn_cache->withdraw("ftp", $key)) {
+ if ($ftp->ping) {
+ # save it again
+ $conn_cache->deposit("ftp", $key, $ftp);
+ return $ftp;
+ }
+ }
+ }
+
+ # try to make a connection
+ my $ftp = LWP::Protocol::MyFTP->new($host,
+ Port => $port,
+ Timeout => $timeout,
+ LocalAddr => $self->{ua}{local_address},
+ );
+ # XXX Should be some what to pass on 'Passive' (header??)
+ unless ($ftp) {
+ $@ =~ s/^Net::FTP: //;
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@);
+ }
+
+ unless ($ftp->login($user, $password, $account)) {
+ # Unauthorized. Let's fake a RC_UNAUTHORIZED response
+ my $mess = scalar($ftp->message);
+ $mess =~ s/\n$//;
+ my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess);
+ $res->header("Server", $ftp->http_server);
+ $res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
+ return $res;
+ }
+
+ my $home = $ftp->pwd;
+ $ftp->home($home);
+
+ $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache;
+
+ return $ftp;
+}
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size = 4096 unless $size;
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through the ftp');
+ }
+
+ my $url = $request->uri;
+ if ($url->scheme ne 'ftp') {
+ my $scheme = $url->scheme;
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::ftp::request called for '$scheme'");
+ }
+
+ # check method
+ my $method = $request->method;
+
+ unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'ftp:' URLs");
+ }
+
+ if ($init_failed) {
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ $init_failed);
+ }
+
+ my $host = $url->host;
+ my $port = $url->port;
+ my $user = $url->user;
+ my $password = $url->password;
+
+ # If a basic autorization header is present than we prefer these over
+ # the username/password specified in the URL.
+ {
+ my($u,$p) = $request->authorization_basic;
+ if (defined $u) {
+ $user = $u;
+ $password = $p;
+ }
+ }
+
+ # We allow the account to be specified in the "Account" header
+ my $account = $request->header('Account');
+
+ my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout);
+ return $ftp if ref($ftp) eq "HTTP::Response"; # ugh!
+
+ # Create an initial response object
+ my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+ $response->header(Server => $ftp->http_server);
+ $response->header('Client-Request-Num' => $ftp->request_count);
+ $response->request($request);
+
+ # Get & fix the path
+ my @path = grep { length } $url->path_segments;
+ my $remote_file = pop(@path);
+ $remote_file = '' unless defined $remote_file;
+
+ my $type;
+ if (ref $remote_file) {
+ my @params;
+ ($remote_file, @params) = @$remote_file;
+ for (@params) {
+ $type = $_ if s/^type=//;
+ }
+ }
+
+ if ($type && $type eq 'a') {
+ $ftp->ascii;
+ }
+ else {
+ $ftp->binary;
+ }
+
+ for (@path) {
+ unless ($ftp->cwd($_)) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
+ "Can't chdir to $_");
+ }
+ }
+
+ if ($method eq 'GET' || $method eq 'HEAD') {
+ if (my $mod_time = $ftp->mdtm($remote_file)) {
+ $response->last_modified($mod_time);
+ if (my $ims = $request->if_modified_since) {
+ if ($mod_time <= $ims) {
+ $response->code(&HTTP::Status::RC_NOT_MODIFIED);
+ $response->message("Not modified");
+ return $response;
+ }
+ }
+ }
+
+ # We'll use this later to abort the transfer if necessary.
+ # if $max_size is defined, we need to abort early. Otherwise, it's
+ # a normal transfer
+ my $max_size = undef;
+
+ # Set resume location, if the client requested it
+ if ($request->header('Range') && $ftp->supported('REST'))
+ {
+ my $range_info = $request->header('Range');
+
+ # Change bytes=2772992-6781209 to just 2772992
+ my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/;
+ if ( defined $start_byte && !defined $end_byte ) {
+
+ # open range -- only the start is specified
+
+ $ftp->restart( $start_byte );
+ # don't define $max_size, we don't want to abort early
+ }
+ elsif ( defined $start_byte && defined $end_byte &&
+ $start_byte >= 0 && $end_byte >= $start_byte ) {
+
+ $ftp->restart( $start_byte );
+ $max_size = $end_byte - $start_byte;
+ }
+ else {
+
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Incorrect syntax for Range request');
+ }
+ }
+ elsif ($request->header('Range') && !$ftp->supported('REST'))
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
+ "Server does not support resume.");
+ }
+
+ my $data; # the data handle
+ if (length($remote_file) and $data = $ftp->retr($remote_file)) {
+ my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
+ $response->header('Content-Type', $type) if $type;
+ for (@enc) {
+ $response->push_header('Content-Encoding', $_);
+ }
+ my $mess = $ftp->message;
+ if ($mess =~ /\((\d+)\s+bytes\)/) {
+ $response->header('Content-Length', "$1");
+ }
+
+ if ($method ne 'HEAD') {
+ # Read data from server
+ $response = $self->collect($arg, $response, sub {
+ my $content = '';
+ my $result = $data->read($content, $size);
+
+ # Stop early if we need to.
+ if (defined $max_size)
+ {
+ # We need an interface to Net::FTP::dataconn for getting
+ # the number of bytes already read
+ my $bytes_received = $data->bytes_read();
+
+ # We were already over the limit. (Should only happen
+ # once at the end.)
+ if ($bytes_received - length($content) > $max_size)
+ {
+ $content = '';
+ }
+ # We just went over the limit
+ elsif ($bytes_received > $max_size)
+ {
+ # Trim content
+ $content = substr($content, 0,
+ $max_size - ($bytes_received - length($content)) );
+ }
+ # We're under the limit
+ else
+ {
+ }
+ }
+
+ return \$content;
+ } );
+ }
+ # abort is needed for HEAD, it's == close if the transfer has
+ # already completed.
+ unless ($data->abort) {
+ # Something did not work too well. Note that we treat
+ # responses to abort() with code 0 in case of HEAD as ok
+ # (at least wu-ftpd 2.6.1(1) does that).
+ if ($method ne 'HEAD' || $ftp->code != 0) {
+ $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
+ $response->message("FTP close response: " . $ftp->code .
+ " " . $ftp->message);
+ }
+ }
+ }
+ elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) {
+ # not a plain file, try to list instead
+ if (length($remote_file) && !$ftp->cwd($remote_file)) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
+ "File '$remote_file' not found");
+ }
+
+ # It should now be safe to try to list the directory
+ my @lsl = $ftp->dir;
+
+ # Try to figure out if the user want us to convert the
+ # directory listing to HTML.
+ my @variants =
+ (
+ ['html', 0.60, 'text/html' ],
+ ['dir', 1.00, 'text/ftp-dir-listing' ]
+ );
+ #$HTTP::Negotiate::DEBUG=1;
+ my $prefer = HTTP::Negotiate::choose(\@variants, $request);
+
+ my $content = '';
+
+ if (!defined($prefer)) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE,
+ "Neither HTML nor directory listing wanted");
+ }
+ elsif ($prefer eq 'html') {
+ $response->header('Content-Type' => 'text/html');
+ $content = "<HEAD><TITLE>File Listing</TITLE>\n";
+ my $base = $request->uri->clone;
+ my $path = $base->path;
+ $base->path("$path/") unless $path =~ m|/$|;
+ $content .= qq(<BASE HREF="$base">\n</HEAD>\n);
+ $content .= "<BODY>\n<UL>\n";
+ for (File::Listing::parse_dir(\@lsl, 'GMT')) {
+ my($name, $type, $size, $mtime, $mode) = @$_;
+ $content .= qq( <LI> <a href="$name">$name</a>);
+ $content .= " $size bytes" if $type eq 'f';
+ $content .= "\n";
+ }
+ $content .= "</UL></body>\n";
+ }
+ else {
+ $response->header('Content-Type', 'text/ftp-dir-listing');
+ $content = join("\n", @lsl, '');
+ }
+
+ $response->header('Content-Length', length($content));
+
+ if ($method ne 'HEAD') {
+ $response = $self->collect_once($arg, $response, $content);
+ }
+ }
+ else {
+ my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "FTP return code " . $ftp->code);
+ $res->content_type("text/plain");
+ $res->content($ftp->message);
+ return $res;
+ }
+ }
+ elsif ($method eq 'PUT') {
+ # method must be PUT
+ unless (length($remote_file)) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Must have a file name to PUT to");
+ }
+ my $data;
+ if ($data = $ftp->stor($remote_file)) {
+ my $content = $request->content;
+ my $bytes = 0;
+ if (defined $content) {
+ if (ref($content) eq 'SCALAR') {
+ $bytes = $data->write($$content, length($$content));
+ }
+ elsif (ref($content) eq 'CODE') {
+ my($buf, $n);
+ while (length($buf = &$content)) {
+ $n = $data->write($buf, length($buf));
+ last unless $n;
+ $bytes += $n;
+ }
+ }
+ elsif (!ref($content)) {
+ if (defined $content && length($content)) {
+ $bytes = $data->write($content, length($content));
+ }
+ }
+ else {
+ die "Bad content";
+ }
+ }
+ $data->close;
+
+ $response->code(&HTTP::Status::RC_CREATED);
+ $response->header('Content-Type', 'text/plain');
+ $response->content("$bytes bytes stored as $remote_file on $host\n")
+
+ }
+ else {
+ my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "FTP return code " . $ftp->code);
+ $res->content_type("text/plain");
+ $res->content($ftp->message);
+ return $res;
+ }
+ }
+ else {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Illegal method $method");
+ }
+
+ $response;
+}
+
+1;
+
+__END__
+
+# This is what RFC 1738 has to say about FTP access:
+# --------------------------------------------------
+#
+# 3.2. FTP
+#
+# The FTP URL scheme is used to designate files and directories on
+# Internet hosts accessible using the FTP protocol (RFC959).
+#
+# A FTP URL follow the syntax described in Section 3.1. If :<port> is
+# omitted, the port defaults to 21.
+#
+# 3.2.1. FTP Name and Password
+#
+# A user name and password may be supplied; they are used in the ftp
+# "USER" and "PASS" commands after first making the connection to the
+# FTP server. If no user name or password is supplied and one is
+# requested by the FTP server, the conventions for "anonymous" FTP are
+# to be used, as follows:
+#
+# The user name "anonymous" is supplied.
+#
+# The password is supplied as the Internet e-mail address
+# of the end user accessing the resource.
+#
+# If the URL supplies a user name but no password, and the remote
+# server requests a password, the program interpreting the FTP URL
+# should request one from the user.
+#
+# 3.2.2. FTP url-path
+#
+# The url-path of a FTP URL has the following syntax:
+#
+# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
+#
+# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
+# and <typecode> is one of the characters "a", "i", or "d". The part
+# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
+# empty. The whole url-path may be omitted, including the "/"
+# delimiting it from the prefix containing user, password, host, and
+# port.
+#
+# The url-path is interpreted as a series of FTP commands as follows:
+#
+# Each of the <cwd> elements is to be supplied, sequentially, as the
+# argument to a CWD (change working directory) command.
+#
+# If the typecode is "d", perform a NLST (name list) command with
+# <name> as the argument, and interpret the results as a file
+# directory listing.
+#
+# Otherwise, perform a TYPE command with <typecode> as the argument,
+# and then access the file whose name is <name> (for example, using
+# the RETR command.)
+#
+# Within a name or CWD component, the characters "/" and ";" are
+# reserved and must be encoded. The components are decoded prior to
+# their use in the FTP protocol. In particular, if the appropriate FTP
+# sequence to access a particular file requires supplying a string
+# containing a "/" as an argument to a CWD or RETR command, it is
+# necessary to encode each "/".
+#
+# For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is
+# interpreted by FTP-ing to "host.dom", logging in as "myname"
+# (prompting for a password if it is asked for), and then executing
+# "CWD /etc" and then "RETR motd". This has a different meaning from
+# <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then
+# "RETR motd"; the initial "CWD" might be executed relative to the
+# default directory for "myname". On the other hand,
+# <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null
+# argument, then "CWD etc", and then "RETR motd".
+#
+# FTP URLs may also be used for other operations; for example, it is
+# possible to update a file on a remote file server, or infer
+# information about it from the directory listings. The mechanism for
+# doing so is not spelled out here.
+#
+# 3.2.3. FTP Typecode is Optional
+#
+# The entire ;type=<typecode> part of a FTP URL is optional. If it is
+# omitted, the client program interpreting the URL must guess the
+# appropriate mode to use. In general, the data content type of a file
+# can only be guessed from the name, e.g., from the suffix of the name;
+# the appropriate type code to be used for transfer of the file can
+# then be deduced from the data content of the file.
+#
+# 3.2.4 Hierarchy
+#
+# For some file systems, the "/" used to denote the hierarchical
+# structure of the URL corresponds to the delimiter used to construct a
+# file name hierarchy, and thus, the filename will look similar to the
+# URL path. This does NOT mean that the URL is a Unix filename.
+#
+# 3.2.5. Optimization
+#
+# Clients accessing resources via FTP may employ additional heuristics
+# to optimize the interaction. For some FTP servers, for example, it
+# may be reasonable to keep the control connection open while accessing
+# multiple URLs from the same server. However, there is no common
+# hierarchical model to the FTP protocol, so if a directory change
+# command has been given, it is impossible in general to deduce what
+# sequence should be given to navigate to another directory for a
+# second retrieval, if the paths are different. The only reliable
+# algorithm is to disconnect and reestablish the control connection.
--- /dev/null
+package LWP::Protocol::gopher;
+
+# Implementation of the gopher protocol (RFC 1436)
+#
+# This code is based on 'wwwgopher.pl,v 0.10 1994/10/17 18:12:34 shelden'
+# which in turn is a vastly modified version of Oscar's http'get()
+# dated 28/3/94 in <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl>
+# including contributions from Marc van Heyningen and Martijn Koster.
+
+use strict;
+use vars qw(@ISA);
+
+require HTTP::Response;
+require HTTP::Status;
+require IO::Socket;
+require IO::Select;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+
+my %gopher2mimetype = (
+ '0' => 'text/plain', # 0 file
+ '1' => 'text/html', # 1 menu
+ # 2 CSO phone-book server
+ # 3 Error
+ '4' => 'application/mac-binhex40', # 4 BinHexed Macintosh file
+ '5' => 'application/zip', # 5 DOS binary archive of some sort
+ '6' => 'application/octet-stream', # 6 UNIX uuencoded file.
+ '7' => 'text/html', # 7 Index-Search server
+ # 8 telnet session
+ '9' => 'application/octet-stream', # 9 binary file
+ 'h' => 'text/html', # html
+ 'g' => 'image/gif', # gif
+ 'I' => 'image/*', # some kind of image
+);
+
+my %gopher2encoding = (
+ '6' => 'x_uuencode', # 6 UNIX uuencoded file.
+);
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size = 4096 unless $size;
+
+ # check proxy
+ if (defined $proxy) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through the gopher');
+ }
+
+ my $url = $request->uri;
+ die "bad scheme" if $url->scheme ne 'gopher';
+
+
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'gopher:' URLs");
+ }
+
+ my $gophertype = $url->gopher_type;
+ unless (exists $gopher2mimetype{$gophertype}) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
+ 'Library does not support gophertype ' .
+ $gophertype);
+ }
+
+ my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+ $response->header('Content-type' => $gopher2mimetype{$gophertype}
+ || 'text/plain');
+ $response->header('Content-Encoding' => $gopher2encoding{$gophertype})
+ if exists $gopher2encoding{$gophertype};
+
+ if ($method eq 'HEAD') {
+ # XXX: don't even try it so we set this header
+ $response->header('Client-Warning' => 'Client answer only');
+ return $response;
+ }
+
+ if ($gophertype eq '7' && ! $url->search) {
+ # the url is the prompt for a gopher search; supply boiler-plate
+ return $self->collect_once($arg, $response, <<"EOT");
+<HEAD>
+<TITLE>Gopher Index</TITLE>
+<ISINDEX>
+</HEAD>
+<BODY>
+<H1>$url<BR>Gopher Search</H1>
+This is a searchable Gopher index.
+Use the search function of your browser to enter search terms.
+</BODY>
+EOT
+ }
+
+ my $host = $url->host;
+ my $port = $url->port;
+
+ my $requestLine = "";
+
+ my $selector = $url->selector;
+ if (defined $selector) {
+ $requestLine .= $selector;
+ my $search = $url->search;
+ if (defined $search) {
+ $requestLine .= "\t$search";
+ my $string = $url->string;
+ if (defined $string) {
+ $requestLine .= "\t$string";
+ }
+ }
+ }
+ $requestLine .= "\015\012";
+
+ # potential request headers are just ignored
+
+ # Ok, lets make the request
+ my $socket = IO::Socket::INET->new(PeerAddr => $host,
+ PeerPort => $port,
+ LocalAddr => $self->{ua}{local_address},
+ Proto => 'tcp',
+ Timeout => $timeout);
+ die "Can't connect to $host:$port" unless $socket;
+ my $sel = IO::Select->new($socket);
+
+ {
+ die "write timeout" if $timeout && !$sel->can_write($timeout);
+ my $n = syswrite($socket, $requestLine, length($requestLine));
+ die $! unless defined($n);
+ die "short write" if $n != length($requestLine);
+ }
+
+ my $user_arg = $arg;
+
+ # must handle menus in a special way since they are to be
+ # converted to HTML. Undefing $arg ensures that the user does
+ # not see the data before we get a change to convert it.
+ $arg = undef if $gophertype eq '1' || $gophertype eq '7';
+
+ # collect response
+ my $buf = '';
+ $response = $self->collect($arg, $response, sub {
+ die "read timeout" if $timeout && !$sel->can_read($timeout);
+ my $n = sysread($socket, $buf, $size);
+ die $! unless defined($n);
+ return \$buf;
+ } );
+
+ # Convert menu to HTML and return data to user.
+ if ($gophertype eq '1' || $gophertype eq '7') {
+ my $content = menu2html($response->content);
+ if (defined $user_arg) {
+ $response = $self->collect_once($user_arg, $response, $content);
+ }
+ else {
+ $response->content($content);
+ }
+ }
+
+ $response;
+}
+
+
+sub gopher2url
+{
+ my($gophertype, $path, $host, $port) = @_;
+
+ my $url;
+
+ if ($gophertype eq '8' || $gophertype eq 'T') {
+ # telnet session
+ $url = $HTTP::URI_CLASS->new($gophertype eq '8' ? 'telnet:':'tn3270:');
+ $url->user($path) if defined $path;
+ }
+ else {
+ $path = URI::Escape::uri_escape($path);
+ $url = $HTTP::URI_CLASS->new("gopher:/$gophertype$path");
+ }
+ $url->host($host);
+ $url->port($port);
+ $url;
+}
+
+sub menu2html {
+ my($menu) = @_;
+
+ $menu =~ s/\015//g; # remove carriage return
+ my $tmp = <<"EOT";
+<HTML>
+<HEAD>
+ <TITLE>Gopher menu</TITLE>
+</HEAD>
+<BODY>
+<H1>Gopher menu</H1>
+EOT
+ for (split("\n", $menu)) {
+ last if /^\./;
+ my($pretty, $path, $host, $port) = split("\t");
+
+ $pretty =~ s/^(.)//;
+ my $type = $1;
+
+ my $url = gopher2url($type, $path, $host, $port)->as_string;
+ $tmp .= qq{<A HREF="$url">$pretty</A><BR>\n};
+ }
+ $tmp .= "</BODY>\n</HTML>\n";
+ $tmp;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::http;
+
+use strict;
+
+require HTTP::Response;
+require HTTP::Status;
+require Net::HTTP;
+
+use vars qw(@ISA @EXTRA_SOCK_OPTS);
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+my $CRLF = "\015\012";
+
+sub _new_socket
+{
+ my($self, $host, $port, $timeout) = @_;
+ my $conn_cache = $self->{ua}{conn_cache};
+ if ($conn_cache) {
+ if (my $sock = $conn_cache->withdraw($self->socket_type, "$host:$port")) {
+ return $sock if $sock && !$sock->can_read(0);
+ # if the socket is readable, then either the peer has closed the
+ # connection or there are some garbage bytes on it. In either
+ # case we abandon it.
+ $sock->close;
+ }
+ }
+
+ local($^W) = 0; # IO::Socket::INET can be noisy
+ my $sock = $self->socket_class->new(PeerAddr => $host,
+ PeerPort => $port,
+ LocalAddr => $self->{ua}{local_address},
+ Proto => 'tcp',
+ Timeout => $timeout,
+ KeepAlive => !!$conn_cache,
+ SendTE => 1,
+ $self->_extra_sock_opts($host, $port),
+ );
+
+ unless ($sock) {
+ # IO::Socket::INET leaves additional error messages in $@
+ $@ =~ s/^.*?: //;
+ die "Can't connect to $host:$port ($@)";
+ }
+
+ # perl 5.005's IO::Socket does not have the blocking method.
+ eval { $sock->blocking(0); };
+
+ $sock;
+}
+
+sub socket_type
+{
+ return "http";
+}
+
+sub socket_class
+{
+ my $self = shift;
+ (ref($self) || $self) . "::Socket";
+}
+
+sub _extra_sock_opts # to be overridden by subclass
+{
+ return @EXTRA_SOCK_OPTS;
+}
+
+sub _check_sock
+{
+ #my($self, $req, $sock) = @_;
+}
+
+sub _get_sock_info
+{
+ my($self, $res, $sock) = @_;
+ if (defined(my $peerhost = $sock->peerhost)) {
+ $res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
+ }
+}
+
+sub _fixup_header
+{
+ my($self, $h, $url, $proxy) = @_;
+
+ # Extract 'Host' header
+ my $hhost = $url->authority;
+ if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
+ # add authorization header if we need them. HTTP URLs do
+ # not really support specification of user and password, but
+ # we allow it.
+ if (defined($1) && not $h->header('Authorization')) {
+ require URI::Escape;
+ $h->authorization_basic(map URI::Escape::uri_unescape($_),
+ split(":", $1, 2));
+ }
+ }
+ $h->init_header('Host' => $hhost);
+
+ if ($proxy) {
+ # Check the proxy URI's userinfo() for proxy credentials
+ # export http_proxy="http://proxyuser:proxypass@proxyhost:port"
+ my $p_auth = $proxy->userinfo();
+ if(defined $p_auth) {
+ require URI::Escape;
+ $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_),
+ split(":", $p_auth, 2))
+ }
+ }
+}
+
+sub hlist_remove {
+ my($hlist, $k) = @_;
+ $k = lc $k;
+ for (my $i = @$hlist - 2; $i >= 0; $i -= 2) {
+ next unless lc($hlist->[$i]) eq $k;
+ splice(@$hlist, $i, 2);
+ }
+}
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size ||= 4096;
+
+ # check method
+ my $method = $request->method;
+ unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'http:' URLs");
+ }
+
+ my $url = $request->uri;
+ my($host, $port, $fullpath);
+
+ # Check if we're proxy'ing
+ if (defined $proxy) {
+ # $proxy is an URL to an HTTP server which will proxy this request
+ $host = $proxy->host;
+ $port = $proxy->port;
+ $fullpath = $method eq "CONNECT" ?
+ ($url->host . ":" . $url->port) :
+ $url->as_string;
+ }
+ else {
+ $host = $url->host;
+ $port = $url->port;
+ $fullpath = $url->path_query;
+ $fullpath = "/$fullpath" unless $fullpath =~ m,^/,;
+ }
+
+ # connect to remote site
+ my $socket = $self->_new_socket($host, $port, $timeout);
+ $self->_check_sock($request, $socket);
+
+ my @h;
+ my $request_headers = $request->headers->clone;
+ $self->_fixup_header($request_headers, $url, $proxy);
+
+ $request_headers->scan(sub {
+ my($k, $v) = @_;
+ $k =~ s/^://;
+ $v =~ s/\n/ /g;
+ push(@h, $k, $v);
+ });
+
+ my $content_ref = $request->content_ref;
+ $content_ref = $$content_ref if ref($$content_ref);
+ my $chunked;
+ my $has_content;
+
+ if (ref($content_ref) eq 'CODE') {
+ my $clen = $request_headers->header('Content-Length');
+ $has_content++ if $clen;
+ unless (defined $clen) {
+ push(@h, "Transfer-Encoding" => "chunked");
+ $has_content++;
+ $chunked++;
+ }
+ }
+ else {
+ # Set (or override) Content-Length header
+ my $clen = $request_headers->header('Content-Length');
+ if (defined($$content_ref) && length($$content_ref)) {
+ $has_content = length($$content_ref);
+ if (!defined($clen) || $clen ne $has_content) {
+ if (defined $clen) {
+ warn "Content-Length header value was wrong, fixed";
+ hlist_remove(\@h, 'Content-Length');
+ }
+ push(@h, 'Content-Length' => $has_content);
+ }
+ }
+ elsif ($clen) {
+ warn "Content-Length set when there is no content, fixed";
+ hlist_remove(\@h, 'Content-Length');
+ }
+ }
+
+ my $write_wait = 0;
+ $write_wait = 2
+ if ($request_headers->header("Expect") || "") =~ /100-continue/;
+
+ my $req_buf = $socket->format_request($method, $fullpath, @h);
+ #print "------\n$req_buf\n------\n";
+
+ if (!$has_content || $write_wait || $has_content > 8*1024) {
+ WRITE:
+ {
+ # Since this just writes out the header block it should almost
+ # always succeed to send the whole buffer in a single write call.
+ my $n = $socket->syswrite($req_buf, length($req_buf));
+ unless (defined $n) {
+ redo WRITE if $!{EINTR};
+ if ($!{EAGAIN}) {
+ select(undef, undef, undef, 0.1);
+ redo WRITE;
+ }
+ die "write failed: $!";
+ }
+ if ($n) {
+ substr($req_buf, 0, $n, "");
+ }
+ else {
+ select(undef, undef, undef, 0.5);
+ }
+ redo WRITE if length $req_buf;
+ }
+ }
+
+ my($code, $mess, @junk);
+ my $drop_connection;
+
+ if ($has_content) {
+ my $eof;
+ my $wbuf;
+ my $woffset = 0;
+ if (ref($content_ref) eq 'CODE') {
+ my $buf = &$content_ref();
+ $buf = "" unless defined($buf);
+ $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
+ if $chunked;
+ substr($buf, 0, 0) = $req_buf if $req_buf;
+ $wbuf = \$buf;
+ }
+ else {
+ if ($req_buf) {
+ my $buf = $req_buf . $$content_ref;
+ $wbuf = \$buf;
+ }
+ else {
+ $wbuf = $content_ref;
+ }
+ $eof = 1;
+ }
+
+ my $fbits = '';
+ vec($fbits, fileno($socket), 1) = 1;
+
+ WRITE:
+ while ($woffset < length($$wbuf)) {
+
+ my $sel_timeout = $timeout;
+ if ($write_wait) {
+ $sel_timeout = $write_wait if $write_wait < $sel_timeout;
+ }
+ my $time_before;
+ $time_before = time if $sel_timeout;
+
+ my $rbits = $fbits;
+ my $wbits = $write_wait ? undef : $fbits;
+ my $sel_timeout_before = $sel_timeout;
+ SELECT:
+ {
+ my $nfound = select($rbits, $wbits, undef, $sel_timeout);
+ if ($nfound < 0) {
+ if ($!{EINTR} || $!{EAGAIN}) {
+ if ($time_before) {
+ $sel_timeout = $sel_timeout_before - (time - $time_before);
+ $sel_timeout = 0 if $sel_timeout < 0;
+ }
+ redo SELECT;
+ }
+ die "select failed: $!";
+ }
+ }
+
+ if ($write_wait) {
+ $write_wait -= time - $time_before;
+ $write_wait = 0 if $write_wait < 0;
+ }
+
+ if (defined($rbits) && $rbits =~ /[^\0]/) {
+ # readable
+ my $buf = $socket->_rbuf;
+ my $n = $socket->sysread($buf, 1024, length($buf));
+ unless (defined $n) {
+ die "read failed: $!" unless $!{EINTR} || $!{EAGAIN};
+ # if we get here the rest of the block will do nothing
+ # and we will retry the read on the next round
+ }
+ elsif ($n == 0) {
+ # the server closed the connection before we finished
+ # writing all the request content. No need to write any more.
+ $drop_connection++;
+ last WRITE;
+ }
+ $socket->_rbuf($buf);
+ if (!$code && $buf =~ /\015?\012\015?\012/) {
+ # a whole response header is present, so we can read it without blocking
+ ($code, $mess, @h) = $socket->read_response_headers(laxed => 1,
+ junk_out => \@junk,
+ );
+ if ($code eq "100") {
+ $write_wait = 0;
+ undef($code);
+ }
+ else {
+ $drop_connection++;
+ last WRITE;
+ # XXX should perhaps try to abort write in a nice way too
+ }
+ }
+ }
+ if (defined($wbits) && $wbits =~ /[^\0]/) {
+ my $n = $socket->syswrite($$wbuf, length($$wbuf), $woffset);
+ unless (defined $n) {
+ die "write failed: $!" unless $!{EINTR} || $!{EAGAIN};
+ $n = 0; # will retry write on the next round
+ }
+ elsif ($n == 0) {
+ die "write failed: no bytes written";
+ }
+ $woffset += $n;
+
+ if (!$eof && $woffset >= length($$wbuf)) {
+ # need to refill buffer from $content_ref code
+ my $buf = &$content_ref();
+ $buf = "" unless defined($buf);
+ $eof++ unless length($buf);
+ $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
+ if $chunked;
+ $wbuf = \$buf;
+ $woffset = 0;
+ }
+ }
+ } # WRITE
+ }
+
+ ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
+ unless $code;
+ ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
+ if $code eq "100";
+
+ my $response = HTTP::Response->new($code, $mess);
+ my $peer_http_version = $socket->peer_http_version;
+ $response->protocol("HTTP/$peer_http_version");
+ {
+ local $HTTP::Headers::TRANSLATE_UNDERSCORE;
+ $response->push_header(@h);
+ }
+ $response->push_header("Client-Junk" => \@junk) if @junk;
+
+ $response->request($request);
+ $self->_get_sock_info($response, $socket);
+
+ if ($method eq "CONNECT") {
+ $response->{client_socket} = $socket; # so it can be picked up
+ return $response;
+ }
+
+ if (my @te = $response->remove_header('Transfer-Encoding')) {
+ $response->push_header('Client-Transfer-Encoding', \@te);
+ }
+ $response->push_header('Client-Response-Num', scalar $socket->increment_response_count);
+
+ my $complete;
+ $response = $self->collect($arg, $response, sub {
+ my $buf = ""; #prevent use of uninitialized value in SSLeay.xs
+ my $n;
+ READ:
+ {
+ $n = $socket->read_entity_body($buf, $size);
+ unless (defined $n) {
+ redo READ if $!{EINTR} || $!{EAGAIN};
+ die "read failed: $!";
+ }
+ redo READ if $n == -1;
+ }
+ $complete++ if !$n;
+ return \$buf;
+ } );
+ $drop_connection++ unless $complete;
+
+ @h = $socket->get_trailers;
+ if (@h) {
+ local $HTTP::Headers::TRANSLATE_UNDERSCORE;
+ $response->push_header(@h);
+ }
+
+ # keep-alive support
+ unless ($drop_connection) {
+ if (my $conn_cache = $self->{ua}{conn_cache}) {
+ my %connection = map { (lc($_) => 1) }
+ split(/\s*,\s*/, ($response->header("Connection") || ""));
+ if (($peer_http_version eq "1.1" && !$connection{close}) ||
+ $connection{"keep-alive"})
+ {
+ $conn_cache->deposit($self->socket_type, "$host:$port", $socket);
+ }
+ }
+ }
+
+ $response;
+}
+
+
+#-----------------------------------------------------------
+package LWP::Protocol::http::SocketMethods;
+
+sub sysread {
+ my $self = shift;
+ if (my $timeout = ${*$self}{io_socket_timeout}) {
+ die "read timeout" unless $self->can_read($timeout);
+ }
+ else {
+ # since we have made the socket non-blocking we
+ # use select to wait for some data to arrive
+ $self->can_read(undef) || die "Assert";
+ }
+ sysread($self, $_[0], $_[1], $_[2] || 0);
+}
+
+sub can_read {
+ my($self, $timeout) = @_;
+ my $fbits = '';
+ vec($fbits, fileno($self), 1) = 1;
+ SELECT:
+ {
+ my $before;
+ $before = time if $timeout;
+ my $nfound = select($fbits, undef, undef, $timeout);
+ if ($nfound < 0) {
+ if ($!{EINTR} || $!{EAGAIN}) {
+ # don't really think EAGAIN can happen here
+ if ($timeout) {
+ $timeout -= time - $before;
+ $timeout = 0 if $timeout < 0;
+ }
+ redo SELECT;
+ }
+ die "select failed: $!";
+ }
+ return $nfound > 0;
+ }
+}
+
+sub ping {
+ my $self = shift;
+ !$self->can_read(0);
+}
+
+sub increment_response_count {
+ my $self = shift;
+ return ++${*$self}{'myhttp_response_count'};
+}
+
+#-----------------------------------------------------------
+package LWP::Protocol::http::Socket;
+use vars qw(@ISA);
+@ISA = qw(LWP::Protocol::http::SocketMethods Net::HTTP);
+
+1;
--- /dev/null
+package LWP::Protocol::http10;
+
+use strict;
+
+require HTTP::Response;
+require HTTP::Status;
+require IO::Socket;
+require IO::Select;
+
+use vars qw(@ISA @EXTRA_SOCK_OPTS);
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+my $CRLF = "\015\012"; # how lines should be terminated;
+ # "\r\n" is not correct on all systems, for
+ # instance MacPerl defines it to "\012\015"
+
+sub _new_socket
+{
+ my($self, $host, $port, $timeout) = @_;
+
+ local($^W) = 0; # IO::Socket::INET can be noisy
+ my $sock = IO::Socket::INET->new(PeerAddr => $host,
+ PeerPort => $port,
+ Proto => 'tcp',
+ Timeout => $timeout,
+ $self->_extra_sock_opts($host, $port),
+ );
+ unless ($sock) {
+ # IO::Socket::INET leaves additional error messages in $@
+ $@ =~ s/^.*?: //;
+ die "Can't connect to $host:$port ($@)";
+ }
+ $sock;
+}
+
+sub _extra_sock_opts # to be overridden by subclass
+{
+ return @EXTRA_SOCK_OPTS;
+}
+
+
+sub _check_sock
+{
+ #my($self, $req, $sock) = @_;
+}
+
+sub _get_sock_info
+{
+ my($self, $res, $sock) = @_;
+ if (defined(my $peerhost = $sock->peerhost)) {
+ $res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
+ }
+}
+
+sub _fixup_header
+{
+ my($self, $h, $url, $proxy) = @_;
+
+ $h->remove_header('Connection'); # need support here to be useful
+
+ # HTTP/1.1 will require us to send the 'Host' header, so we might
+ # as well start now.
+ my $hhost = $url->authority;
+ if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
+ # add authorization header if we need them. HTTP URLs do
+ # not really support specification of user and password, but
+ # we allow it.
+ if (defined($1) && not $h->header('Authorization')) {
+ require URI::Escape;
+ $h->authorization_basic(map URI::Escape::uri_unescape($_),
+ split(":", $1, 2));
+ }
+ }
+ $h->init_header('Host' => $hhost);
+
+ if ($proxy) {
+ # Check the proxy URI's userinfo() for proxy credentials
+ # export http_proxy="http://proxyuser:proxypass@proxyhost:port"
+ my $p_auth = $proxy->userinfo();
+ if(defined $p_auth) {
+ require URI::Escape;
+ $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_),
+ split(":", $p_auth, 2))
+ }
+ }
+}
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size ||= 4096;
+
+ # check method
+ my $method = $request->method;
+ unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'http:' URLs");
+ }
+
+ my $url = $request->uri;
+ my($host, $port, $fullpath);
+
+ # Check if we're proxy'ing
+ if (defined $proxy) {
+ # $proxy is an URL to an HTTP server which will proxy this request
+ $host = $proxy->host;
+ $port = $proxy->port;
+ $fullpath = $method eq "CONNECT" ?
+ ($url->host . ":" . $url->port) :
+ $url->as_string;
+ }
+ else {
+ $host = $url->host;
+ $port = $url->port;
+ $fullpath = $url->path_query;
+ $fullpath = "/" unless length $fullpath;
+ }
+
+ # connect to remote site
+ my $socket = $self->_new_socket($host, $port, $timeout);
+ $self->_check_sock($request, $socket);
+
+ my $sel = IO::Select->new($socket) if $timeout;
+
+ my $request_line = "$method $fullpath HTTP/1.0$CRLF";
+
+ my $h = $request->headers->clone;
+ my $cont_ref = $request->content_ref;
+ $cont_ref = $$cont_ref if ref($$cont_ref);
+ my $ctype = ref($cont_ref);
+
+ # If we're sending content we *have* to specify a content length
+ # otherwise the server won't know a messagebody is coming.
+ if ($ctype eq 'CODE') {
+ die 'No Content-Length header for request with dynamic content'
+ unless defined($h->header('Content-Length')) ||
+ $h->content_type =~ /^multipart\//;
+ # For HTTP/1.1 we could have used chunked transfer encoding...
+ }
+ else {
+ $h->header('Content-Length' => length $$cont_ref)
+ if defined($$cont_ref) && length($$cont_ref);
+ }
+
+ $self->_fixup_header($h, $url, $proxy);
+
+ my $buf = $request_line . $h->as_string($CRLF) . $CRLF;
+ my $n; # used for return value from syswrite/sysread
+ my $length;
+ my $offset;
+
+ # syswrite $buf
+ $length = length($buf);
+ $offset = 0;
+ while ( $offset < $length ) {
+ die "write timeout" if $timeout && !$sel->can_write($timeout);
+ $n = $socket->syswrite($buf, $length-$offset, $offset );
+ die $! unless defined($n);
+ $offset += $n;
+ }
+
+ if ($ctype eq 'CODE') {
+ while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) {
+ # syswrite $buf
+ $length = length($buf);
+ $offset = 0;
+ while ( $offset < $length ) {
+ die "write timeout" if $timeout && !$sel->can_write($timeout);
+ $n = $socket->syswrite($buf, $length-$offset, $offset );
+ die $! unless defined($n);
+ $offset += $n;
+ }
+ }
+ }
+ elsif (defined($$cont_ref) && length($$cont_ref)) {
+ # syswrite $$cont_ref
+ $length = length($$cont_ref);
+ $offset = 0;
+ while ( $offset < $length ) {
+ die "write timeout" if $timeout && !$sel->can_write($timeout);
+ $n = $socket->syswrite($$cont_ref, $length-$offset, $offset );
+ die $! unless defined($n);
+ $offset += $n;
+ }
+ }
+
+ # read response line from server
+ my $response;
+ $buf = '';
+
+ # Inside this loop we will read the response line and all headers
+ # found in the response.
+ while (1) {
+ die "read timeout" if $timeout && !$sel->can_read($timeout);
+ $n = $socket->sysread($buf, $size, length($buf));
+ die $! unless defined($n);
+ die "unexpected EOF before status line seen" unless $n;
+
+ if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) {
+ # HTTP/1.0 response or better
+ my($ver,$code,$msg) = ($1, $2, $3);
+ $msg =~ s/\015$//;
+ $response = HTTP::Response->new($code, $msg);
+ $response->protocol($ver);
+
+ # ensure that we have read all headers. The headers will be
+ # terminated by two blank lines
+ until ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) {
+ # must read more if we can...
+ die "read timeout" if $timeout && !$sel->can_read($timeout);
+ my $old_len = length($buf);
+ $n = $socket->sysread($buf, $size, $old_len);
+ die $! unless defined($n);
+ die "unexpected EOF before all headers seen" unless $n;
+ }
+
+ # now we start parsing the headers. The strategy is to
+ # remove one line at a time from the beginning of the header
+ # buffer ($res).
+ my($key, $val);
+ while ($buf =~ s/([^\012]*)\012//) {
+ my $line = $1;
+
+ # if we need to restore as content when illegal headers
+ # are found.
+ my $save = "$line\012";
+
+ $line =~ s/\015$//;
+ last unless length $line;
+
+ if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
+ $response->push_header($key, $val) if $key;
+ ($key, $val) = ($1, $2);
+ }
+ elsif ($line =~ /^\s+(.*)/ && $key) {
+ $val .= " $1";
+ }
+ else {
+ $response->push_header("Client-Bad-Header-Line" => $line);
+ }
+ }
+ $response->push_header($key, $val) if $key;
+ last;
+
+ }
+ elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or
+ $buf =~ /\012/ ) {
+ # HTTP/0.9 or worse
+ $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+ $response->protocol('HTTP/0.9');
+ last;
+
+ }
+ else {
+ # need more data
+ }
+ };
+ $response->request($request);
+ $self->_get_sock_info($response, $socket);
+
+ if ($method eq "CONNECT") {
+ $response->{client_socket} = $socket; # so it can be picked up
+ $response->content($buf); # in case we read more than the headers
+ return $response;
+ }
+
+ my $usebuf = length($buf) > 0;
+ $response = $self->collect($arg, $response, sub {
+ if ($usebuf) {
+ $usebuf = 0;
+ return \$buf;
+ }
+ die "read timeout" if $timeout && !$sel->can_read($timeout);
+ my $n = $socket->sysread($buf, $size);
+ die $! unless defined($n);
+ return \$buf;
+ } );
+
+ #$socket->close;
+
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::https;
+
+use strict;
+
+use vars qw(@ISA);
+require LWP::Protocol::http;
+@ISA = qw(LWP::Protocol::http);
+
+sub socket_type
+{
+ return "https";
+}
+
+sub _check_sock
+{
+ my($self, $req, $sock) = @_;
+ my $check = $req->header("If-SSL-Cert-Subject");
+ if (defined $check) {
+ my $cert = $sock->get_peer_certificate ||
+ die "Missing SSL certificate";
+ my $subject = $cert->subject_name;
+ die "Bad SSL certificate subject: '$subject' !~ /$check/"
+ unless $subject =~ /$check/;
+ $req->remove_header("If-SSL-Cert-Subject"); # don't pass it on
+ }
+}
+
+sub _get_sock_info
+{
+ my $self = shift;
+ $self->SUPER::_get_sock_info(@_);
+ my($res, $sock) = @_;
+ $res->header("Client-SSL-Cipher" => $sock->get_cipher);
+ my $cert = $sock->get_peer_certificate;
+ if ($cert) {
+ $res->header("Client-SSL-Cert-Subject" => $cert->subject_name);
+ $res->header("Client-SSL-Cert-Issuer" => $cert->issuer_name);
+ }
+ if(! eval { $sock->get_peer_verify }) {
+ $res->header("Client-SSL-Warning" => "Peer certificate not verified");
+ }
+}
+
+#-----------------------------------------------------------
+package LWP::Protocol::https::Socket;
+
+use vars qw(@ISA);
+require Net::HTTPS;
+@ISA = qw(Net::HTTPS LWP::Protocol::http::SocketMethods);
+
+1;
--- /dev/null
+package LWP::Protocol::https10;
+
+use strict;
+
+# Figure out which SSL implementation to use
+use vars qw($SSL_CLASS);
+if ($Net::SSL::VERSION) {
+ $SSL_CLASS = "Net::SSL";
+}
+elsif ($IO::Socket::SSL::VERSION) {
+ $SSL_CLASS = "IO::Socket::SSL"; # it was already loaded
+}
+else {
+ eval { require Net::SSL; }; # from Crypt-SSLeay
+ if ($@) {
+ require IO::Socket::SSL;
+ $SSL_CLASS = "IO::Socket::SSL";
+ }
+ else {
+ $SSL_CLASS = "Net::SSL";
+ }
+}
+
+
+use vars qw(@ISA);
+
+require LWP::Protocol::http10;
+@ISA=qw(LWP::Protocol::http10);
+
+sub _new_socket
+{
+ my($self, $host, $port, $timeout) = @_;
+ local($^W) = 0; # IO::Socket::INET can be noisy
+ my $sock = $SSL_CLASS->new(PeerAddr => $host,
+ PeerPort => $port,
+ Proto => 'tcp',
+ Timeout => $timeout,
+ );
+ unless ($sock) {
+ # IO::Socket::INET leaves additional error messages in $@
+ $@ =~ s/^.*?: //;
+ die "Can't connect to $host:$port ($@)";
+ }
+ $sock;
+}
+
+sub _check_sock
+{
+ my($self, $req, $sock) = @_;
+ my $check = $req->header("If-SSL-Cert-Subject");
+ if (defined $check) {
+ my $cert = $sock->get_peer_certificate ||
+ die "Missing SSL certificate";
+ my $subject = $cert->subject_name;
+ die "Bad SSL certificate subject: '$subject' !~ /$check/"
+ unless $subject =~ /$check/;
+ $req->remove_header("If-SSL-Cert-Subject"); # don't pass it on
+ }
+}
+
+sub _get_sock_info
+{
+ my $self = shift;
+ $self->SUPER::_get_sock_info(@_);
+ my($res, $sock) = @_;
+ $res->header("Client-SSL-Cipher" => $sock->get_cipher);
+ my $cert = $sock->get_peer_certificate;
+ if ($cert) {
+ $res->header("Client-SSL-Cert-Subject" => $cert->subject_name);
+ $res->header("Client-SSL-Cert-Issuer" => $cert->issuer_name);
+ }
+ $res->header("Client-SSL-Warning" => "Peer certificate not verified");
+}
+
+1;
--- /dev/null
+package LWP::Protocol::loopback;
+
+use strict;
+use vars qw(@ISA);
+require HTTP::Response;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+sub request {
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ my $response = HTTP::Response->new(200, "OK");
+ $response->content_type("message/http; msgtype=request");
+
+ $response->header("Via", "loopback/1.0 $proxy")
+ if $proxy;
+
+ $response->header("X-Arg", $arg);
+ $response->header("X-Read-Size", $size);
+ $response->header("X-Timeout", $timeout);
+
+ return $self->collect_once($arg, $response, $request->as_string);
+}
+
+1;
--- /dev/null
+package LWP::Protocol::mailto;
+
+# This module implements the mailto protocol. It is just a simple
+# frontend to the Unix sendmail program except on MacOS, where it uses
+# Mail::Internet.
+
+require LWP::Protocol;
+require HTTP::Request;
+require HTTP::Response;
+require HTTP::Status;
+
+use Carp;
+use strict;
+use vars qw(@ISA $SENDMAIL);
+
+@ISA = qw(LWP::Protocol);
+
+unless ($SENDMAIL = $ENV{SENDMAIL}) {
+ for my $sm (qw(/usr/sbin/sendmail
+ /usr/lib/sendmail
+ /usr/ucblib/sendmail
+ ))
+ {
+ if (-x $sm) {
+ $SENDMAIL = $sm;
+ last;
+ }
+ }
+ die "Can't find the 'sendmail' program" unless $SENDMAIL;
+}
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size) = @_;
+
+ my ($mail, $addr) if $^O eq "MacOS";
+ my @text = () if $^O eq "MacOS";
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy with mail');
+ }
+
+ # check method
+ my $method = $request->method;
+
+ if ($method ne 'POST') {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'mailto:' URLs");
+ }
+
+ # check url
+ my $url = $request->uri;
+
+ my $scheme = $url->scheme;
+ if ($scheme ne 'mailto') {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::mailto::request called for '$scheme'");
+ }
+ if ($^O eq "MacOS") {
+ eval {
+ require Mail::Internet;
+ };
+ if($@) {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "You don't have MailTools installed");
+ }
+ unless ($ENV{SMTPHOSTS}) {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "You don't have SMTPHOSTS defined");
+ }
+ }
+ else {
+ unless (-x $SENDMAIL) {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "You don't have $SENDMAIL");
+ }
+ }
+ if ($^O eq "MacOS") {
+ $mail = Mail::Internet->new or
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Can't get a Mail::Internet object");
+ }
+ else {
+ open(SENDMAIL, "| $SENDMAIL -oi -t") or
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Can't run $SENDMAIL: $!");
+ }
+ if ($^O eq "MacOS") {
+ $addr = $url->encoded822addr;
+ }
+ else {
+ $request = $request->clone; # we modify a copy
+ my @h = $url->headers; # URL headers override those in the request
+ while (@h) {
+ my $k = shift @h;
+ my $v = shift @h;
+ next unless defined $v;
+ if (lc($k) eq "body") {
+ $request->content($v);
+ }
+ else {
+ $request->push_header($k => $v);
+ }
+ }
+ }
+ if ($^O eq "MacOS") {
+ $mail->add(To => $addr);
+ $mail->add(split(/[:\n]/,$request->headers_as_string));
+ }
+ else {
+ print SENDMAIL $request->headers_as_string;
+ print SENDMAIL "\n";
+ }
+ my $content = $request->content;
+ if (defined $content) {
+ my $contRef = ref($content) ? $content : \$content;
+ if (ref($contRef) eq 'SCALAR') {
+ if ($^O eq "MacOS") {
+ @text = split("\n",$$contRef);
+ foreach (@text) {
+ $_ .= "\n";
+ }
+ }
+ else {
+ print SENDMAIL $$contRef;
+ }
+
+ }
+ elsif (ref($contRef) eq 'CODE') {
+ # Callback provides data
+ my $d;
+ if ($^O eq "MacOS") {
+ my $stuff = "";
+ while (length($d = &$contRef)) {
+ $stuff .= $d;
+ }
+ @text = split("\n",$stuff);
+ foreach (@text) {
+ $_ .= "\n";
+ }
+ }
+ else {
+ print SENDMAIL $d;
+ }
+ }
+ }
+ if ($^O eq "MacOS") {
+ $mail->body(\@text);
+ unless ($mail->smtpsend) {
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Mail::Internet->smtpsend unable to send message to <$addr>");
+ }
+ }
+ else {
+ unless (close(SENDMAIL)) {
+ my $err = $! ? "$!" : "Exit status $?";
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "$SENDMAIL: $err");
+ }
+ }
+
+
+ my $response = HTTP::Response->new(&HTTP::Status::RC_ACCEPTED,
+ "Mail accepted");
+ $response->header('Content-Type', 'text/plain');
+ if ($^O eq "MacOS") {
+ $response->header('Server' => "Mail::Internet $Mail::Internet::VERSION");
+ $response->content("Message sent to <$addr>\n");
+ }
+ else {
+ $response->header('Server' => $SENDMAIL);
+ my $to = $request->header("To");
+ $response->content("Message sent to <$to>\n");
+ }
+
+ return $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::nntp;
+
+# Implementation of the Network News Transfer Protocol (RFC 977)
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+require HTTP::Response;
+require HTTP::Status;
+require Net::NNTP;
+
+use strict;
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size = 4096 unless $size;
+
+ # Check for proxy
+ if (defined $proxy) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through NNTP');
+ }
+
+ # Check that the scheme is as expected
+ my $url = $request->uri;
+ my $scheme = $url->scheme;
+ unless ($scheme eq 'news' || $scheme eq 'nntp') {
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::nntp::request called for '$scheme'");
+ }
+
+ # check for a valid method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'POST') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for '$scheme:' URLs");
+ }
+
+ # extract the identifier and check against posting to an article
+ my $groupart = $url->_group;
+ my $is_art = $groupart =~ /@/;
+
+ if ($is_art && $method eq 'POST') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Can't post to an article <$groupart>");
+ }
+
+ my $nntp = Net::NNTP->new($url->host,
+ #Port => 18574,
+ Timeout => $timeout,
+ #Debug => 1,
+ );
+ die "Can't connect to nntp server" unless $nntp;
+
+ # Check the initial welcome message from the NNTP server
+ if ($nntp->status != 2) {
+ return HTTP::Response->new(&HTTP::Status::RC_SERVICE_UNAVAILABLE,
+ $nntp->message);
+ }
+ my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+
+ my $mess = $nntp->message;
+
+ # Try to extract server name from greeting message.
+ # Don't know if this works well for a large class of servers, but
+ # this works for our server.
+ $mess =~ s/\s+ready\b.*//;
+ $mess =~ s/^\S+\s+//;
+ $response->header(Server => $mess);
+
+ # First we handle posting of articles
+ if ($method eq 'POST') {
+ $nntp->quit; $nntp = undef;
+ $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED);
+ $response->message("POST not implemented yet");
+ return $response;
+ }
+
+ # The method must be "GET" or "HEAD" by now
+ if (!$is_art) {
+ if (!$nntp->group($groupart)) {
+ $response->code(&HTTP::Status::RC_NOT_FOUND);
+ $response->message($nntp->message);
+ }
+ $nntp->quit; $nntp = undef;
+ # HEAD: just check if the group exists
+ if ($method eq 'GET' && $response->is_success) {
+ $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED);
+ $response->message("GET newsgroup not implemented yet");
+ }
+ return $response;
+ }
+
+ # Send command to server to retrieve an article (or just the headers)
+ my $get = $method eq 'HEAD' ? "head" : "article";
+ my $art = $nntp->$get("<$groupart>");
+ unless ($art) {
+ $nntp->quit; $nntp = undef;
+ $response->code(&HTTP::Status::RC_NOT_FOUND);
+ $response->message($nntp->message);
+ return $response;
+ }
+
+ # Parse headers
+ my($key, $val);
+ local $_;
+ while ($_ = shift @$art) {
+ if (/^\s+$/) {
+ last; # end of headers
+ }
+ elsif (/^(\S+):\s*(.*)/) {
+ $response->push_header($key, $val) if $key;
+ ($key, $val) = ($1, $2);
+ }
+ elsif (/^\s+(.*)/) {
+ next unless $key;
+ $val .= $1;
+ }
+ else {
+ unshift(@$art, $_);
+ last;
+ }
+ }
+ $response->push_header($key, $val) if $key;
+
+ # Ensure that there is a Content-Type header
+ $response->header("Content-Type", "text/plain")
+ unless $response->header("Content-Type");
+
+ # Collect the body
+ $response = $self->collect_once($arg, $response, join("", @$art))
+ if @$art;
+
+ # Say goodbye to the server
+ $nntp->quit;
+ $nntp = undef;
+
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::nogo;
+# If you want to disable access to a particular scheme, use this
+# class and then call
+# LWP::Protocol::implementor(that_scheme, 'LWP::Protocol::nogo');
+# For then on, attempts to access URLs with that scheme will generate
+# a 500 error.
+
+use strict;
+use vars qw(@ISA);
+require HTTP::Response;
+require HTTP::Status;
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+sub request {
+ my($self, $request) = @_;
+ my $scheme = $request->uri->scheme;
+
+ return HTTP::Response->new(
+ &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Access to \'$scheme\' URIs has been disabled"
+ );
+}
+1;
--- /dev/null
+package LWP::RobotUA;
+
+require LWP::UserAgent;
+@ISA = qw(LWP::UserAgent);
+$VERSION = "5.835";
+
+require WWW::RobotRules;
+require HTTP::Request;
+require HTTP::Response;
+
+use Carp ();
+use HTTP::Status ();
+use HTTP::Date qw(time2str);
+use strict;
+
+
+#
+# Additional attributes in addition to those found in LWP::UserAgent:
+#
+# $self->{'delay'} Required delay between request to the same
+# server in minutes.
+#
+# $self->{'rules'} A WWW::RobotRules object
+#
+
+sub new
+{
+ my $class = shift;
+ my %cnf;
+ if (@_ < 4) {
+ # legacy args
+ @cnf{qw(agent from rules)} = @_;
+ }
+ else {
+ %cnf = @_;
+ }
+
+ Carp::croak('LWP::RobotUA agent required') unless $cnf{agent};
+ Carp::croak('LWP::RobotUA from address required')
+ unless $cnf{from} && $cnf{from} =~ m/\@/;
+
+ my $delay = delete $cnf{delay} || 1;
+ my $use_sleep = delete $cnf{use_sleep};
+ $use_sleep = 1 unless defined($use_sleep);
+ my $rules = delete $cnf{rules};
+
+ my $self = LWP::UserAgent->new(%cnf);
+ $self = bless $self, $class;
+
+ $self->{'delay'} = $delay; # minutes
+ $self->{'use_sleep'} = $use_sleep;
+
+ if ($rules) {
+ $rules->agent($cnf{agent});
+ $self->{'rules'} = $rules;
+ }
+ else {
+ $self->{'rules'} = WWW::RobotRules->new($cnf{agent});
+ }
+
+ $self;
+}
+
+
+sub delay { shift->_elem('delay', @_); }
+sub use_sleep { shift->_elem('use_sleep', @_); }
+
+
+sub agent
+{
+ my $self = shift;
+ my $old = $self->SUPER::agent(@_);
+ if (@_) {
+ # Changing our name means to start fresh
+ $self->{'rules'}->agent($self->{'agent'});
+ }
+ $old;
+}
+
+
+sub rules {
+ my $self = shift;
+ my $old = $self->_elem('rules', @_);
+ $self->{'rules'}->agent($self->{'agent'}) if @_;
+ $old;
+}
+
+
+sub no_visits
+{
+ my($self, $netloc) = @_;
+ $self->{'rules'}->no_visits($netloc) || 0;
+}
+
+*host_count = \&no_visits; # backwards compatibility with LWP-5.02
+
+
+sub host_wait
+{
+ my($self, $netloc) = @_;
+ return undef unless defined $netloc;
+ my $last = $self->{'rules'}->last_visit($netloc);
+ if ($last) {
+ my $wait = int($self->{'delay'} * 60 - (time - $last));
+ $wait = 0 if $wait < 0;
+ return $wait;
+ }
+ return 0;
+}
+
+
+sub simple_request
+{
+ my($self, $request, $arg, $size) = @_;
+
+ # Do we try to access a new server?
+ my $allowed = $self->{'rules'}->allowed($request->uri);
+
+ if ($allowed < 0) {
+ # Host is not visited before, or robots.txt expired; fetch "robots.txt"
+ my $robot_url = $request->uri->clone;
+ $robot_url->path("robots.txt");
+ $robot_url->query(undef);
+
+ # make access to robot.txt legal since this will be a recursive call
+ $self->{'rules'}->parse($robot_url, "");
+
+ my $robot_req = HTTP::Request->new('GET', $robot_url);
+ my $robot_res = $self->request($robot_req);
+ my $fresh_until = $robot_res->fresh_until;
+ if ($robot_res->is_success) {
+ my $c = $robot_res->content;
+ if ($robot_res->content_type =~ m,^text/, && $c =~ /^\s*Disallow\s*:/mi) {
+ $self->{'rules'}->parse($robot_url, $c, $fresh_until);
+ }
+ else {
+ $self->{'rules'}->parse($robot_url, "", $fresh_until);
+ }
+
+ }
+ else {
+ $self->{'rules'}->parse($robot_url, "", $fresh_until);
+ }
+
+ # recalculate allowed...
+ $allowed = $self->{'rules'}->allowed($request->uri);
+ }
+
+ # Check rules
+ unless ($allowed) {
+ my $res = HTTP::Response->new(
+ &HTTP::Status::RC_FORBIDDEN, 'Forbidden by robots.txt');
+ $res->request( $request ); # bind it to that request
+ return $res;
+ }
+
+ my $netloc = eval { local $SIG{__DIE__}; $request->uri->host_port; };
+ my $wait = $self->host_wait($netloc);
+
+ if ($wait) {
+ if ($self->{'use_sleep'}) {
+ sleep($wait)
+ }
+ else {
+ my $res = HTTP::Response->new(
+ &HTTP::Status::RC_SERVICE_UNAVAILABLE, 'Please, slow down');
+ $res->header('Retry-After', time2str(time + $wait));
+ $res->request( $request ); # bind it to that request
+ return $res;
+ }
+ }
+
+ # Perform the request
+ my $res = $self->SUPER::simple_request($request, $arg, $size);
+
+ $self->{'rules'}->visit($netloc);
+
+ $res;
+}
+
+
+sub as_string
+{
+ my $self = shift;
+ my @s;
+ push(@s, "Robot: $self->{'agent'} operated by $self->{'from'} [$self]");
+ push(@s, " Minimum delay: " . int($self->{'delay'}*60) . "s");
+ push(@s, " Will sleep if too early") if $self->{'use_sleep'};
+ push(@s, " Rules = $self->{'rules'}");
+ join("\n", @s, '');
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::RobotUA - a class for well-behaved Web robots
+
+=head1 SYNOPSIS
+
+ use LWP::RobotUA;
+ my $ua = LWP::RobotUA->new('my-robot/0.1', 'me@foo.com');
+ $ua->delay(10); # be very nice -- max one hit every ten minutes!
+ ...
+
+ # Then just use it just like a normal LWP::UserAgent:
+ my $response = $ua->get('http://whatever.int/...');
+ ...
+
+=head1 DESCRIPTION
+
+This class implements a user agent that is suitable for robot
+applications. Robots should be nice to the servers they visit. They
+should consult the F</robots.txt> file to ensure that they are welcomed
+and they should not make requests too frequently.
+
+But before you consider writing a robot, take a look at
+<URL:http://www.robotstxt.org/>.
+
+When you use a I<LWP::RobotUA> object as your user agent, then you do not
+really have to think about these things yourself; C<robots.txt> files
+are automatically consulted and obeyed, the server isn't queried
+too rapidly, and so on. Just send requests
+as you do when you are using a normal I<LWP::UserAgent>
+object (using C<< $ua->get(...) >>, C<< $ua->head(...) >>,
+C<< $ua->request(...) >>, etc.), and this
+special agent will make sure you are nice.
+
+=head1 METHODS
+
+The LWP::RobotUA is a sub-class of LWP::UserAgent and implements the
+same methods. In addition the following methods are provided:
+
+=over 4
+
+=item $ua = LWP::RobotUA->new( %options )
+
+=item $ua = LWP::RobotUA->new( $agent, $from )
+
+=item $ua = LWP::RobotUA->new( $agent, $from, $rules )
+
+The LWP::UserAgent options C<agent> and C<from> are mandatory. The
+options C<delay>, C<use_sleep> and C<rules> initialize attributes
+private to the RobotUA. If C<rules> are not provided, then
+C<WWW::RobotRules> is instantiated providing an internal database of
+F<robots.txt>.
+
+It is also possible to just pass the value of C<agent>, C<from> and
+optionally C<rules> as plain positional arguments.
+
+=item $ua->delay
+
+=item $ua->delay( $minutes )
+
+Get/set the minimum delay between requests to the same server, in
+I<minutes>. The default is 1 minute. Note that this number doesn't
+have to be an integer; for example, this sets the delay to 10 seconds:
+
+ $ua->delay(10/60);
+
+=item $ua->use_sleep
+
+=item $ua->use_sleep( $boolean )
+
+Get/set a value indicating whether the UA should sleep() if requests
+arrive too fast, defined as $ua->delay minutes not passed since
+last request to the given server. The default is TRUE. If this value is
+FALSE then an internal SERVICE_UNAVAILABLE response will be generated.
+It will have an Retry-After header that indicates when it is OK to
+send another request to this server.
+
+=item $ua->rules
+
+=item $ua->rules( $rules )
+
+Set/get which I<WWW::RobotRules> object to use.
+
+=item $ua->no_visits( $netloc )
+
+Returns the number of documents fetched from this server host. Yeah I
+know, this method should probably have been named num_visits() or
+something like that. :-(
+
+=item $ua->host_wait( $netloc )
+
+Returns the number of I<seconds> (from now) you must wait before you can
+make a new request to this host.
+
+=item $ua->as_string
+
+Returns a string that describes the state of the UA.
+Mainly useful for debugging.
+
+=back
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>, L<WWW::RobotRules>
+
+=head1 COPYRIGHT
+
+Copyright 1996-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package LWP::Simple;
+
+use strict;
+use vars qw($ua %loop_check $FULL_LWP @EXPORT @EXPORT_OK $VERSION);
+
+require Exporter;
+
+@EXPORT = qw(get head getprint getstore mirror);
+@EXPORT_OK = qw($ua);
+
+# I really hate this. I was a bad idea to do it in the first place.
+# Wonder how to get rid of it??? (It even makes LWP::Simple 7% slower
+# for trivial tests)
+use HTTP::Status;
+push(@EXPORT, @HTTP::Status::EXPORT);
+
+$VERSION = "5.835";
+
+sub import
+{
+ my $pkg = shift;
+ my $callpkg = caller;
+ Exporter::export($pkg, $callpkg, @_);
+}
+
+use LWP::UserAgent ();
+use HTTP::Status ();
+use HTTP::Date ();
+$ua = LWP::UserAgent->new; # we create a global UserAgent object
+$ua->agent("LWP::Simple/$VERSION ");
+$ua->env_proxy;
+
+
+sub get ($)
+{
+ my $response = $ua->get(shift);
+ return $response->decoded_content if $response->is_success;
+ return undef;
+}
+
+
+sub head ($)
+{
+ my($url) = @_;
+ my $request = HTTP::Request->new(HEAD => $url);
+ my $response = $ua->request($request);
+
+ if ($response->is_success) {
+ return $response unless wantarray;
+ return (scalar $response->header('Content-Type'),
+ scalar $response->header('Content-Length'),
+ HTTP::Date::str2time($response->header('Last-Modified')),
+ HTTP::Date::str2time($response->header('Expires')),
+ scalar $response->header('Server'),
+ );
+ }
+ return;
+}
+
+
+sub getprint ($)
+{
+ my($url) = @_;
+ my $request = HTTP::Request->new(GET => $url);
+ local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
+ my $callback = sub { print $_[0] };
+ if ($^O eq "MacOS") {
+ $callback = sub { $_[0] =~ s/\015?\012/\n/g; print $_[0] }
+ }
+ my $response = $ua->request($request, $callback);
+ unless ($response->is_success) {
+ print STDERR $response->status_line, " <URL:$url>\n";
+ }
+ $response->code;
+}
+
+
+sub getstore ($$)
+{
+ my($url, $file) = @_;
+ my $request = HTTP::Request->new(GET => $url);
+ my $response = $ua->request($request, $file);
+
+ $response->code;
+}
+
+
+sub mirror ($$)
+{
+ my($url, $file) = @_;
+ my $response = $ua->mirror($url, $file);
+ $response->code;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::Simple - simple procedural interface to LWP
+
+=head1 SYNOPSIS
+
+ perl -MLWP::Simple -e 'getprint "http://www.sn.no"'
+
+ use LWP::Simple;
+ $content = get("http://www.sn.no/");
+ die "Couldn't get it!" unless defined $content;
+
+ if (mirror("http://www.sn.no/", "foo") == RC_NOT_MODIFIED) {
+ ...
+ }
+
+ if (is_success(getprint("http://www.sn.no/"))) {
+ ...
+ }
+
+=head1 DESCRIPTION
+
+This module is meant for people who want a simplified view of the
+libwww-perl library. It should also be suitable for one-liners. If
+you need more control or access to the header fields in the requests
+sent and responses received, then you should use the full object-oriented
+interface provided by the C<LWP::UserAgent> module.
+
+The following functions are provided (and exported) by this module:
+
+=over 3
+
+=item get($url)
+
+The get() function will fetch the document identified by the given URL
+and return it. It returns C<undef> if it fails. The $url argument can
+be either a string or a reference to a URI object.
+
+You will not be able to examine the response code or response headers
+(like 'Content-Type') when you are accessing the web using this
+function. If you need that information you should use the full OO
+interface (see L<LWP::UserAgent>).
+
+=item head($url)
+
+Get document headers. Returns the following 5 values if successful:
+($content_type, $document_length, $modified_time, $expires, $server)
+
+Returns an empty list if it fails. In scalar context returns TRUE if
+successful.
+
+=item getprint($url)
+
+Get and print a document identified by a URL. The document is printed
+to the selected default filehandle for output (normally STDOUT) as
+data is received from the network. If the request fails, then the
+status code and message are printed on STDERR. The return value is
+the HTTP response code.
+
+=item getstore($url, $file)
+
+Gets a document identified by a URL and stores it in the file. The
+return value is the HTTP response code.
+
+=item mirror($url, $file)
+
+Get and store a document identified by a URL, using
+I<If-modified-since>, and checking the I<Content-Length>. Returns
+the HTTP response code.
+
+=back
+
+This module also exports the HTTP::Status constants and procedures.
+You can use them when you check the response code from getprint(),
+getstore() or mirror(). The constants are:
+
+ RC_CONTINUE
+ RC_SWITCHING_PROTOCOLS
+ RC_OK
+ RC_CREATED
+ RC_ACCEPTED
+ RC_NON_AUTHORITATIVE_INFORMATION
+ RC_NO_CONTENT
+ RC_RESET_CONTENT
+ RC_PARTIAL_CONTENT
+ RC_MULTIPLE_CHOICES
+ RC_MOVED_PERMANENTLY
+ RC_MOVED_TEMPORARILY
+ RC_SEE_OTHER
+ RC_NOT_MODIFIED
+ RC_USE_PROXY
+ RC_BAD_REQUEST
+ RC_UNAUTHORIZED
+ RC_PAYMENT_REQUIRED
+ RC_FORBIDDEN
+ RC_NOT_FOUND
+ RC_METHOD_NOT_ALLOWED
+ RC_NOT_ACCEPTABLE
+ RC_PROXY_AUTHENTICATION_REQUIRED
+ RC_REQUEST_TIMEOUT
+ RC_CONFLICT
+ RC_GONE
+ RC_LENGTH_REQUIRED
+ RC_PRECONDITION_FAILED
+ RC_REQUEST_ENTITY_TOO_LARGE
+ RC_REQUEST_URI_TOO_LARGE
+ RC_UNSUPPORTED_MEDIA_TYPE
+ RC_INTERNAL_SERVER_ERROR
+ RC_NOT_IMPLEMENTED
+ RC_BAD_GATEWAY
+ RC_SERVICE_UNAVAILABLE
+ RC_GATEWAY_TIMEOUT
+ RC_HTTP_VERSION_NOT_SUPPORTED
+
+The HTTP::Status classification functions are:
+
+=over 3
+
+=item is_success($rc)
+
+True if response code indicated a successful request.
+
+=item is_error($rc)
+
+True if response code indicated that an error occurred.
+
+=back
+
+The module will also export the LWP::UserAgent object as C<$ua> if you
+ask for it explicitly.
+
+The user agent created by this module will identify itself as
+"LWP::Simple/#.##"
+and will initialize its proxy defaults from the environment (by
+calling $ua->env_proxy).
+
+=head1 CAVEAT
+
+Note that if you are using both LWP::Simple and the very popular CGI.pm
+module, you may be importing a C<head> function from each module,
+producing a warning like "Prototype mismatch: sub main::head ($) vs
+none". Get around this problem by just not importing LWP::Simple's
+C<head> function, like so:
+
+ use LWP::Simple qw(!head);
+ use CGI qw(:standard); # then only CGI.pm defines a head()
+
+Then if you do need LWP::Simple's C<head> function, you can just call
+it as C<LWP::Simple::head($url)>.
+
+=head1 SEE ALSO
+
+L<LWP>, L<lwpcook>, L<LWP::UserAgent>, L<HTTP::Status>, L<lwp-request>,
+L<lwp-mirror>
--- /dev/null
+package LWP::UserAgent;
+
+use strict;
+use vars qw(@ISA $VERSION);
+
+require LWP::MemberMixin;
+@ISA = qw(LWP::MemberMixin);
+$VERSION = "5.835";
+
+use HTTP::Request ();
+use HTTP::Response ();
+use HTTP::Date ();
+
+use LWP ();
+use LWP::Protocol ();
+
+use Carp ();
+
+if ($ENV{PERL_LWP_USE_HTTP_10}) {
+ require LWP::Protocol::http10;
+ LWP::Protocol::implementor('http', 'LWP::Protocol::http10');
+ eval {
+ require LWP::Protocol::https10;
+ LWP::Protocol::implementor('https', 'LWP::Protocol::https10');
+ };
+}
+
+
+
+sub new
+{
+ # Check for common user mistake
+ Carp::croak("Options to LWP::UserAgent should be key/value pairs, not hash reference")
+ if ref($_[1]) eq 'HASH';
+
+ my($class, %cnf) = @_;
+
+ my $agent = delete $cnf{agent};
+ my $from = delete $cnf{from};
+ my $def_headers = delete $cnf{default_headers};
+ my $timeout = delete $cnf{timeout};
+ $timeout = 3*60 unless defined $timeout;
+ my $local_address = delete $cnf{local_address};
+ my $use_eval = delete $cnf{use_eval};
+ $use_eval = 1 unless defined $use_eval;
+ my $parse_head = delete $cnf{parse_head};
+ $parse_head = 1 unless defined $parse_head;
+ my $show_progress = delete $cnf{show_progress};
+ my $max_size = delete $cnf{max_size};
+ my $max_redirect = delete $cnf{max_redirect};
+ $max_redirect = 7 unless defined $max_redirect;
+ my $env_proxy = delete $cnf{env_proxy};
+
+ my $cookie_jar = delete $cnf{cookie_jar};
+ my $conn_cache = delete $cnf{conn_cache};
+ my $keep_alive = delete $cnf{keep_alive};
+
+ Carp::croak("Can't mix conn_cache and keep_alive")
+ if $conn_cache && $keep_alive;
+
+
+ my $protocols_allowed = delete $cnf{protocols_allowed};
+ my $protocols_forbidden = delete $cnf{protocols_forbidden};
+
+ my $requests_redirectable = delete $cnf{requests_redirectable};
+ $requests_redirectable = ['GET', 'HEAD']
+ unless defined $requests_redirectable;
+
+ # Actually ""s are just as good as 0's, but for concision we'll just say:
+ Carp::croak("protocols_allowed has to be an arrayref or 0, not \"$protocols_allowed\"!")
+ if $protocols_allowed and ref($protocols_allowed) ne 'ARRAY';
+ Carp::croak("protocols_forbidden has to be an arrayref or 0, not \"$protocols_forbidden\"!")
+ if $protocols_forbidden and ref($protocols_forbidden) ne 'ARRAY';
+ Carp::croak("requests_redirectable has to be an arrayref or 0, not \"$requests_redirectable\"!")
+ if $requests_redirectable and ref($requests_redirectable) ne 'ARRAY';
+
+
+ if (%cnf && $^W) {
+ Carp::carp("Unrecognized LWP::UserAgent options: @{[sort keys %cnf]}");
+ }
+
+ my $self = bless {
+ def_headers => $def_headers,
+ timeout => $timeout,
+ local_address => $local_address,
+ use_eval => $use_eval,
+ show_progress=> $show_progress,
+ max_size => $max_size,
+ max_redirect => $max_redirect,
+ proxy => {},
+ no_proxy => [],
+ protocols_allowed => $protocols_allowed,
+ protocols_forbidden => $protocols_forbidden,
+ requests_redirectable => $requests_redirectable,
+ }, $class;
+
+ $self->agent(defined($agent) ? $agent : $class->_agent)
+ if defined($agent) || !$def_headers || !$def_headers->header("User-Agent");
+ $self->from($from) if $from;
+ $self->cookie_jar($cookie_jar) if $cookie_jar;
+ $self->parse_head($parse_head);
+ $self->env_proxy if $env_proxy;
+
+ $self->protocols_allowed( $protocols_allowed ) if $protocols_allowed;
+ $self->protocols_forbidden($protocols_forbidden) if $protocols_forbidden;
+
+ if ($keep_alive) {
+ $conn_cache ||= { total_capacity => $keep_alive };
+ }
+ $self->conn_cache($conn_cache) if $conn_cache;
+
+ return $self;
+}
+
+
+sub send_request
+{
+ my($self, $request, $arg, $size) = @_;
+ my($method, $url) = ($request->method, $request->uri);
+ my $scheme = $url->scheme;
+
+ local($SIG{__DIE__}); # protect against user defined die handlers
+
+ $self->progress("begin", $request);
+
+ my $response = $self->run_handlers("request_send", $request);
+
+ unless ($response) {
+ my $protocol;
+
+ {
+ # Honor object-specific restrictions by forcing protocol objects
+ # into class LWP::Protocol::nogo.
+ my $x;
+ if($x = $self->protocols_allowed) {
+ if (grep lc($_) eq $scheme, @$x) {
+ }
+ else {
+ require LWP::Protocol::nogo;
+ $protocol = LWP::Protocol::nogo->new;
+ }
+ }
+ elsif ($x = $self->protocols_forbidden) {
+ if(grep lc($_) eq $scheme, @$x) {
+ require LWP::Protocol::nogo;
+ $protocol = LWP::Protocol::nogo->new;
+ }
+ }
+ # else fall thru and create the protocol object normally
+ }
+
+ # Locate protocol to use
+ my $proxy = $request->{proxy};
+ if ($proxy) {
+ $scheme = $proxy->scheme;
+ }
+
+ unless ($protocol) {
+ $protocol = eval { LWP::Protocol::create($scheme, $self) };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//s; # remove file/line number
+ $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
+ if ($scheme eq "https") {
+ $response->message($response->message . " (Crypt::SSLeay or IO::Socket::SSL not installed)");
+ $response->content_type("text/plain");
+ $response->content(<<EOT);
+LWP will support https URLs if either Crypt::SSLeay or IO::Socket::SSL
+is installed. More information at
+<http://search.cpan.org/dist/libwww-perl/README.SSL>.
+EOT
+ }
+ }
+ }
+
+ if (!$response && $self->{use_eval}) {
+ # we eval, and turn dies into responses below
+ eval {
+ $response = $protocol->request($request, $proxy,
+ $arg, $size, $self->{timeout});
+ };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//s; # remove file/line number
+ $response = _new_response($request,
+ &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ $@);
+ }
+ }
+ elsif (!$response) {
+ $response = $protocol->request($request, $proxy,
+ $arg, $size, $self->{timeout});
+ # XXX: Should we die unless $response->is_success ???
+ }
+ }
+
+ $response->request($request); # record request for reference
+ $response->header("Client-Date" => HTTP::Date::time2str(time));
+
+ $self->run_handlers("response_done", $response);
+
+ $self->progress("end", $response);
+ return $response;
+}
+
+
+sub prepare_request
+{
+ my($self, $request) = @_;
+ die "Method missing" unless $request->method;
+ my $url = $request->uri;
+ die "URL missing" unless $url;
+ die "URL must be absolute" unless $url->scheme;
+
+ $self->run_handlers("request_preprepare", $request);
+
+ if (my $def_headers = $self->{def_headers}) {
+ for my $h ($def_headers->header_field_names) {
+ $request->init_header($h => [$def_headers->header($h)]);
+ }
+ }
+
+ $self->run_handlers("request_prepare", $request);
+
+ return $request;
+}
+
+
+sub simple_request
+{
+ my($self, $request, $arg, $size) = @_;
+
+ # sanity check the request passed in
+ if (defined $request) {
+ if (ref $request) {
+ Carp::croak("You need a request object, not a " . ref($request) . " object")
+ if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
+ !$request->can('method') or !$request->can('uri');
+ }
+ else {
+ Carp::croak("You need a request object, not '$request'");
+ }
+ }
+ else {
+ Carp::croak("No request object passed in");
+ }
+
+ eval {
+ $request = $self->prepare_request($request);
+ };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//s; # remove file/line number
+ return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, $@);
+ }
+ return $self->send_request($request, $arg, $size);
+}
+
+
+sub request
+{
+ my($self, $request, $arg, $size, $previous) = @_;
+
+ my $response = $self->simple_request($request, $arg, $size);
+ $response->previous($previous) if $previous;
+
+ if ($response->redirects >= $self->{max_redirect}) {
+ $response->header("Client-Warning" =>
+ "Redirect loop detected (max_redirect = $self->{max_redirect})");
+ return $response;
+ }
+
+ if (my $req = $self->run_handlers("response_redirect", $response)) {
+ return $self->request($req, $arg, $size, $response);
+ }
+
+ my $code = $response->code;
+
+ if ($code == &HTTP::Status::RC_MOVED_PERMANENTLY or
+ $code == &HTTP::Status::RC_FOUND or
+ $code == &HTTP::Status::RC_SEE_OTHER or
+ $code == &HTTP::Status::RC_TEMPORARY_REDIRECT)
+ {
+ my $referral = $request->clone;
+
+ # These headers should never be forwarded
+ $referral->remove_header('Host', 'Cookie');
+
+ if ($referral->header('Referer') &&
+ $request->uri->scheme eq 'https' &&
+ $referral->uri->scheme eq 'http')
+ {
+ # RFC 2616, section 15.1.3.
+ # https -> http redirect, suppressing Referer
+ $referral->remove_header('Referer');
+ }
+
+ if ($code == &HTTP::Status::RC_SEE_OTHER ||
+ $code == &HTTP::Status::RC_FOUND)
+ {
+ my $method = uc($referral->method);
+ unless ($method eq "GET" || $method eq "HEAD") {
+ $referral->method("GET");
+ $referral->content("");
+ $referral->remove_content_headers;
+ }
+ }
+
+ # And then we update the URL based on the Location:-header.
+ my $referral_uri = $response->header('Location');
+ {
+ # Some servers erroneously return a relative URL for redirects,
+ # so make it absolute if it not already is.
+ local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
+ my $base = $response->base;
+ $referral_uri = "" unless defined $referral_uri;
+ $referral_uri = $HTTP::URI_CLASS->new($referral_uri, $base)
+ ->abs($base);
+ }
+ $referral->uri($referral_uri);
+
+ return $response unless $self->redirect_ok($referral, $response);
+ return $self->request($referral, $arg, $size, $response);
+
+ }
+ elsif ($code == &HTTP::Status::RC_UNAUTHORIZED ||
+ $code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED
+ )
+ {
+ my $proxy = ($code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED);
+ my $ch_header = $proxy ? "Proxy-Authenticate" : "WWW-Authenticate";
+ my @challenge = $response->header($ch_header);
+ unless (@challenge) {
+ $response->header("Client-Warning" =>
+ "Missing Authenticate header");
+ return $response;
+ }
+
+ require HTTP::Headers::Util;
+ CHALLENGE: for my $challenge (@challenge) {
+ $challenge =~ tr/,/;/; # "," is used to separate auth-params!!
+ ($challenge) = HTTP::Headers::Util::split_header_words($challenge);
+ my $scheme = shift(@$challenge);
+ shift(@$challenge); # no value
+ $challenge = { @$challenge }; # make rest into a hash
+
+ unless ($scheme =~ /^([a-z]+(?:-[a-z]+)*)$/) {
+ $response->header("Client-Warning" =>
+ "Bad authentication scheme '$scheme'");
+ return $response;
+ }
+ $scheme = $1; # untainted now
+ my $class = "LWP::Authen::\u$scheme";
+ $class =~ s/-/_/g;
+
+ no strict 'refs';
+ unless (%{"$class\::"}) {
+ # try to load it
+ eval "require $class";
+ if ($@) {
+ if ($@ =~ /^Can\'t locate/) {
+ $response->header("Client-Warning" =>
+ "Unsupported authentication scheme '$scheme'");
+ }
+ else {
+ $response->header("Client-Warning" => $@);
+ }
+ next CHALLENGE;
+ }
+ }
+ unless ($class->can("authenticate")) {
+ $response->header("Client-Warning" =>
+ "Unsupported authentication scheme '$scheme'");
+ next CHALLENGE;
+ }
+ return $class->authenticate($self, $proxy, $challenge, $response,
+ $request, $arg, $size);
+ }
+ return $response;
+ }
+ return $response;
+}
+
+
+#
+# Now the shortcuts...
+#
+sub get {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters,1);
+ return $self->request( HTTP::Request::Common::GET( @parameters ), @suff );
+}
+
+
+sub post {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
+ return $self->request( HTTP::Request::Common::POST( @parameters ), @suff );
+}
+
+
+sub head {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters,1);
+ return $self->request( HTTP::Request::Common::HEAD( @parameters ), @suff );
+}
+
+
+sub _process_colonic_headers {
+ # Process :content_cb / :content_file / :read_size_hint headers.
+ my($self, $args, $start_index) = @_;
+
+ my($arg, $size);
+ for(my $i = $start_index; $i < @$args; $i += 2) {
+ next unless defined $args->[$i];
+
+ #printf "Considering %s => %s\n", $args->[$i], $args->[$i + 1];
+
+ if($args->[$i] eq ':content_cb') {
+ # Some sanity-checking...
+ $arg = $args->[$i + 1];
+ Carp::croak("A :content_cb value can't be undef") unless defined $arg;
+ Carp::croak("A :content_cb value must be a coderef")
+ unless ref $arg and UNIVERSAL::isa($arg, 'CODE');
+
+ }
+ elsif ($args->[$i] eq ':content_file') {
+ $arg = $args->[$i + 1];
+
+ # Some sanity-checking...
+ Carp::croak("A :content_file value can't be undef")
+ unless defined $arg;
+ Carp::croak("A :content_file value can't be a reference")
+ if ref $arg;
+ Carp::croak("A :content_file value can't be \"\"")
+ unless length $arg;
+
+ }
+ elsif ($args->[$i] eq ':read_size_hint') {
+ $size = $args->[$i + 1];
+ # Bother checking it?
+
+ }
+ else {
+ next;
+ }
+ splice @$args, $i, 2;
+ $i -= 2;
+ }
+
+ # And return a suitable suffix-list for request(REQ,...)
+
+ return unless defined $arg;
+ return $arg, $size if defined $size;
+ return $arg;
+}
+
+my @ANI = qw(- \ | /);
+
+sub progress {
+ my($self, $status, $m) = @_;
+ return unless $self->{show_progress};
+
+ local($,, $\);
+ if ($status eq "begin") {
+ print STDERR "** ", $m->method, " ", $m->uri, " ==> ";
+ $self->{progress_start} = time;
+ $self->{progress_lastp} = "";
+ $self->{progress_ani} = 0;
+ }
+ elsif ($status eq "end") {
+ delete $self->{progress_lastp};
+ delete $self->{progress_ani};
+ print STDERR $m->status_line;
+ my $t = time - delete $self->{progress_start};
+ print STDERR " (${t}s)" if $t;
+ print STDERR "\n";
+ }
+ elsif ($status eq "tick") {
+ print STDERR "$ANI[$self->{progress_ani}++]\b";
+ $self->{progress_ani} %= @ANI;
+ }
+ else {
+ my $p = sprintf "%3.0f%%", $status * 100;
+ return if $p eq $self->{progress_lastp};
+ print STDERR "$p\b\b\b\b";
+ $self->{progress_lastp} = $p;
+ }
+ STDERR->flush;
+}
+
+
+#
+# This whole allow/forbid thing is based on man 1 at's way of doing things.
+#
+sub is_protocol_supported
+{
+ my($self, $scheme) = @_;
+ if (ref $scheme) {
+ # assume we got a reference to an URI object
+ $scheme = $scheme->scheme;
+ }
+ else {
+ Carp::croak("Illegal scheme '$scheme' passed to is_protocol_supported")
+ if $scheme =~ /\W/;
+ $scheme = lc $scheme;
+ }
+
+ my $x;
+ if(ref($self) and $x = $self->protocols_allowed) {
+ return 0 unless grep lc($_) eq $scheme, @$x;
+ }
+ elsif (ref($self) and $x = $self->protocols_forbidden) {
+ return 0 if grep lc($_) eq $scheme, @$x;
+ }
+
+ local($SIG{__DIE__}); # protect against user defined die handlers
+ $x = LWP::Protocol::implementor($scheme);
+ return 1 if $x and $x ne 'LWP::Protocol::nogo';
+ return 0;
+}
+
+
+sub protocols_allowed { shift->_elem('protocols_allowed' , @_) }
+sub protocols_forbidden { shift->_elem('protocols_forbidden' , @_) }
+sub requests_redirectable { shift->_elem('requests_redirectable', @_) }
+
+
+sub redirect_ok
+{
+ # RFC 2616, section 10.3.2 and 10.3.3 say:
+ # If the 30[12] status code is received in response to a request other
+ # than GET or HEAD, the user agent MUST NOT automatically redirect the
+ # request unless it can be confirmed by the user, since this might
+ # change the conditions under which the request was issued.
+
+ # Note that this routine used to be just:
+ # return 0 if $_[1]->method eq "POST"; return 1;
+
+ my($self, $new_request, $response) = @_;
+ my $method = $response->request->method;
+ return 0 unless grep $_ eq $method,
+ @{ $self->requests_redirectable || [] };
+
+ if ($new_request->uri->scheme eq 'file') {
+ $response->header("Client-Warning" =>
+ "Can't redirect to a file:// URL!");
+ return 0;
+ }
+
+ # Otherwise it's apparently okay...
+ return 1;
+}
+
+
+sub credentials
+{
+ my $self = shift;
+ my $netloc = lc(shift);
+ my $realm = shift || "";
+ my $old = $self->{basic_authentication}{$netloc}{$realm};
+ if (@_) {
+ $self->{basic_authentication}{$netloc}{$realm} = [@_];
+ }
+ return unless $old;
+ return @$old if wantarray;
+ return join(":", @$old);
+}
+
+
+sub get_basic_credentials
+{
+ my($self, $realm, $uri, $proxy) = @_;
+ return if $proxy;
+ return $self->credentials($uri->host_port, $realm);
+}
+
+
+sub timeout { shift->_elem('timeout', @_); }
+sub local_address{ shift->_elem('local_address',@_); }
+sub max_size { shift->_elem('max_size', @_); }
+sub max_redirect { shift->_elem('max_redirect', @_); }
+sub show_progress{ shift->_elem('show_progress', @_); }
+
+sub parse_head {
+ my $self = shift;
+ if (@_) {
+ my $flag = shift;
+ my $parser;
+ my $old = $self->set_my_handler("response_header", $flag ? sub {
+ my($response, $ua) = @_;
+ require HTML::HeadParser;
+ $parser = HTML::HeadParser->new;
+ $parser->xml_mode(1) if $response->content_is_xhtml;
+ $parser->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40;
+
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ return unless $parser;
+ unless ($parser->parse($_[3])) {
+ my $h = $parser->header;
+ my $r = $_[0];
+ for my $f ($h->header_field_names) {
+ $r->init_header($f, [$h->header($f)]);
+ }
+ undef($parser);
+ }
+ },
+ });
+
+ } : undef,
+ m_media_type => "html",
+ );
+ return !!$old;
+ }
+ else {
+ return !!$self->get_my_handler("response_header");
+ }
+}
+
+sub cookie_jar {
+ my $self = shift;
+ my $old = $self->{cookie_jar};
+ if (@_) {
+ my $jar = shift;
+ if (ref($jar) eq "HASH") {
+ require HTTP::Cookies;
+ $jar = HTTP::Cookies->new(%$jar);
+ }
+ $self->{cookie_jar} = $jar;
+ $self->set_my_handler("request_prepare",
+ $jar ? sub { $jar->add_cookie_header($_[0]); } : undef,
+ );
+ $self->set_my_handler("response_done",
+ $jar ? sub { $jar->extract_cookies($_[0]); } : undef,
+ );
+ }
+ $old;
+}
+
+sub default_headers {
+ my $self = shift;
+ my $old = $self->{def_headers} ||= HTTP::Headers->new;
+ if (@_) {
+ Carp::croak("default_headers not set to HTTP::Headers compatible object")
+ unless @_ == 1 && $_[0]->can("header_field_names");
+ $self->{def_headers} = shift;
+ }
+ return $old;
+}
+
+sub default_header {
+ my $self = shift;
+ return $self->default_headers->header(@_);
+}
+
+sub _agent { "libwww-perl/$LWP::VERSION" }
+
+sub agent {
+ my $self = shift;
+ if (@_) {
+ my $agent = shift;
+ if ($agent) {
+ $agent .= $self->_agent if $agent =~ /\s+$/;
+ }
+ else {
+ undef($agent)
+ }
+ return $self->default_header("User-Agent", $agent);
+ }
+ return $self->default_header("User-Agent");
+}
+
+sub from { # legacy
+ my $self = shift;
+ return $self->default_header("From", @_);
+}
+
+
+sub conn_cache {
+ my $self = shift;
+ my $old = $self->{conn_cache};
+ if (@_) {
+ my $cache = shift;
+ if (ref($cache) eq "HASH") {
+ require LWP::ConnCache;
+ $cache = LWP::ConnCache->new(%$cache);
+ }
+ $self->{conn_cache} = $cache;
+ }
+ $old;
+}
+
+
+sub add_handler {
+ my($self, $phase, $cb, %spec) = @_;
+ $spec{line} ||= join(":", (caller)[1,2]);
+ my $conf = $self->{handlers}{$phase} ||= do {
+ require HTTP::Config;
+ HTTP::Config->new;
+ };
+ $conf->add(%spec, callback => $cb);
+}
+
+sub set_my_handler {
+ my($self, $phase, $cb, %spec) = @_;
+ $spec{owner} = (caller(1))[3] unless exists $spec{owner};
+ $self->remove_handler($phase, %spec);
+ $spec{line} ||= join(":", (caller)[1,2]);
+ $self->add_handler($phase, $cb, %spec) if $cb;
+}
+
+sub get_my_handler {
+ my $self = shift;
+ my $phase = shift;
+ my $init = pop if @_ % 2;
+ my %spec = @_;
+ my $conf = $self->{handlers}{$phase};
+ unless ($conf) {
+ return unless $init;
+ require HTTP::Config;
+ $conf = $self->{handlers}{$phase} = HTTP::Config->new;
+ }
+ $spec{owner} = (caller(1))[3] unless exists $spec{owner};
+ my @h = $conf->find(%spec);
+ if (!@h && $init) {
+ if (ref($init) eq "CODE") {
+ $init->(\%spec);
+ }
+ elsif (ref($init) eq "HASH") {
+ while (my($k, $v) = each %$init) {
+ $spec{$k} = $v;
+ }
+ }
+ $spec{callback} ||= sub {};
+ $spec{line} ||= join(":", (caller)[1,2]);
+ $conf->add(\%spec);
+ return \%spec;
+ }
+ return wantarray ? @h : $h[0];
+}
+
+sub remove_handler {
+ my($self, $phase, %spec) = @_;
+ if ($phase) {
+ my $conf = $self->{handlers}{$phase} || return;
+ my @h = $conf->remove(%spec);
+ delete $self->{handlers}{$phase} if $conf->empty;
+ return @h;
+ }
+
+ return unless $self->{handlers};
+ return map $self->remove_handler($_), sort keys %{$self->{handlers}};
+}
+
+sub handlers {
+ my($self, $phase, $o) = @_;
+ my @h;
+ if ($o->{handlers} && $o->{handlers}{$phase}) {
+ push(@h, @{$o->{handlers}{$phase}});
+ }
+ if (my $conf = $self->{handlers}{$phase}) {
+ push(@h, $conf->matching($o));
+ }
+ return @h;
+}
+
+sub run_handlers {
+ my($self, $phase, $o) = @_;
+ if (defined(wantarray)) {
+ for my $h ($self->handlers($phase, $o)) {
+ my $ret = $h->{callback}->($o, $self, $h);
+ return $ret if $ret;
+ }
+ return undef;
+ }
+
+ for my $h ($self->handlers($phase, $o)) {
+ $h->{callback}->($o, $self, $h);
+ }
+}
+
+
+# depreciated
+sub use_eval { shift->_elem('use_eval', @_); }
+sub use_alarm
+{
+ Carp::carp("LWP::UserAgent->use_alarm(BOOL) is a no-op")
+ if @_ > 1 && $^W;
+ "";
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $copy = bless { %$self }, ref $self; # copy most fields
+
+ delete $copy->{handlers};
+ delete $copy->{conn_cache};
+
+ # copy any plain arrays and hashes; known not to need recursive copy
+ for my $k (qw(proxy no_proxy requests_redirectable)) {
+ next unless $copy->{$k};
+ if (ref($copy->{$k}) eq "ARRAY") {
+ $copy->{$k} = [ @{$copy->{$k}} ];
+ }
+ elsif (ref($copy->{$k}) eq "HASH") {
+ $copy->{$k} = { %{$copy->{$k}} };
+ }
+ }
+
+ if ($self->{def_headers}) {
+ $copy->{def_headers} = $self->{def_headers}->clone;
+ }
+
+ # re-enable standard handlers
+ $copy->parse_head($self->parse_head);
+
+ # no easy way to clone the cookie jar; so let's just remove it for now
+ $copy->cookie_jar(undef);
+
+ $copy;
+}
+
+
+sub mirror
+{
+ my($self, $url, $file) = @_;
+
+ my $request = HTTP::Request->new('GET', $url);
+
+ # If the file exists, add a cache-related header
+ if ( -e $file ) {
+ my ($mtime) = ( stat($file) )[9];
+ if ($mtime) {
+ $request->header( 'If-Modified-Since' => HTTP::Date::time2str($mtime) );
+ }
+ }
+ my $tmpfile = "$file-$$";
+
+ my $response = $self->request($request, $tmpfile);
+ if ( $response->header('X-Died') ) {
+ die $response->header('X-Died');
+ }
+
+ # Only fetching a fresh copy of the would be considered success.
+ # If the file was not modified, "304" would returned, which
+ # is considered by HTTP::Status to be a "redirect", /not/ "success"
+ if ( $response->is_success ) {
+ my @stat = stat($tmpfile) or die "Could not stat tmpfile '$tmpfile': $!";
+ my $file_length = $stat[7];
+ my ($content_length) = $response->header('Content-length');
+
+ if ( defined $content_length and $file_length < $content_length ) {
+ unlink($tmpfile);
+ die "Transfer truncated: " . "only $file_length out of $content_length bytes received\n";
+ }
+ elsif ( defined $content_length and $file_length > $content_length ) {
+ unlink($tmpfile);
+ die "Content-length mismatch: " . "expected $content_length bytes, got $file_length\n";
+ }
+ # The file was the expected length.
+ else {
+ # Replace the stale file with a fresh copy
+ if ( -e $file ) {
+ # Some dosish systems fail to rename if the target exists
+ chmod 0777, $file;
+ unlink $file;
+ }
+ rename( $tmpfile, $file )
+ or die "Cannot rename '$tmpfile' to '$file': $!\n";
+
+ # make sure the file has the same last modification time
+ if ( my $lm = $response->last_modified ) {
+ utime $lm, $lm, $file;
+ }
+ }
+ }
+ # The local copy is fresh enough, so just delete the temp file
+ else {
+ unlink($tmpfile);
+ }
+ return $response;
+}
+
+
+sub _need_proxy {
+ my($req, $ua) = @_;
+ return if exists $req->{proxy};
+ my $proxy = $ua->{proxy}{$req->uri->scheme} || return;
+ if ($ua->{no_proxy}) {
+ if (my $host = eval { $req->uri->host }) {
+ for my $domain (@{$ua->{no_proxy}}) {
+ if ($host =~ /\Q$domain\E$/) {
+ return;
+ }
+ }
+ }
+ }
+ $req->{proxy} = $HTTP::URI_CLASS->new($proxy);
+}
+
+
+sub proxy
+{
+ my $self = shift;
+ my $key = shift;
+ return map $self->proxy($_, @_), @$key if ref $key;
+
+ Carp::croak("'$key' is not a valid URI scheme") unless $key =~ /^$URI::scheme_re\z/;
+ my $old = $self->{'proxy'}{$key};
+ if (@_) {
+ my $url = shift;
+ if (defined($url) && length($url)) {
+ Carp::croak("Proxy must be specified as absolute URI; '$url' is not") unless $url =~ /^$URI::scheme_re:/;
+ Carp::croak("Bad http proxy specification '$url'") if $url =~ /^https?:/ && $url !~ m,^https?://\w,;
+ }
+ $self->{proxy}{$key} = $url;
+ $self->set_my_handler("request_preprepare", \&_need_proxy)
+ }
+ return $old;
+}
+
+
+sub env_proxy {
+ my ($self) = @_;
+ my($k,$v);
+ while(($k, $v) = each %ENV) {
+ if ($ENV{REQUEST_METHOD}) {
+ # Need to be careful when called in the CGI environment, as
+ # the HTTP_PROXY variable is under control of that other guy.
+ next if $k =~ /^HTTP_/;
+ $k = "HTTP_PROXY" if $k eq "CGI_HTTP_PROXY";
+ }
+ $k = lc($k);
+ next unless $k =~ /^(.*)_proxy$/;
+ $k = $1;
+ if ($k eq 'no') {
+ $self->no_proxy(split(/\s*,\s*/, $v));
+ }
+ else {
+ # Ignore random _proxy variables, allow only valid schemes
+ next unless $k =~ /^$URI::scheme_re\z/;
+ # Ignore xxx_proxy variables if xxx isn't a supported protocol
+ next unless LWP::Protocol::implementor($k);
+ $self->proxy($k, $v);
+ }
+ }
+}
+
+
+sub no_proxy {
+ my($self, @no) = @_;
+ if (@no) {
+ push(@{ $self->{'no_proxy'} }, @no);
+ }
+ else {
+ $self->{'no_proxy'} = [];
+ }
+}
+
+
+sub _new_response {
+ my($request, $code, $message) = @_;
+ my $response = HTTP::Response->new($code, $message);
+ $response->request($request);
+ $response->header("Client-Date" => HTTP::Date::time2str(time));
+ $response->header("Client-Warning" => "Internal response");
+ $response->header("Content-Type" => "text/plain");
+ $response->content("$code $message\n");
+ return $response;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::UserAgent - Web user agent class
+
+=head1 SYNOPSIS
+
+ require LWP::UserAgent;
+
+ my $ua = LWP::UserAgent->new;
+ $ua->timeout(10);
+ $ua->env_proxy;
+
+ my $response = $ua->get('http://search.cpan.org/');
+
+ if ($response->is_success) {
+ print $response->decoded_content; # or whatever
+ }
+ else {
+ die $response->status_line;
+ }
+
+=head1 DESCRIPTION
+
+The C<LWP::UserAgent> is a class implementing a web user agent.
+C<LWP::UserAgent> objects can be used to dispatch web requests.
+
+In normal use the application creates an C<LWP::UserAgent> object, and
+then configures it with values for timeouts, proxies, name, etc. It
+then creates an instance of C<HTTP::Request> for the request that
+needs to be performed. This request is then passed to one of the
+request method the UserAgent, which dispatches it using the relevant
+protocol, and returns a C<HTTP::Response> object. There are
+convenience methods for sending the most common request types: get(),
+head() and post(). When using these methods then the creation of the
+request object is hidden as shown in the synopsis above.
+
+The basic approach of the library is to use HTTP style communication
+for all protocol schemes. This means that you will construct
+C<HTTP::Request> objects and receive C<HTTP::Response> objects even
+for non-HTTP resources like I<gopher> and I<ftp>. In order to achieve
+even more similarity to HTTP style communications, gopher menus and
+file directories are converted to HTML documents.
+
+=head1 CONSTRUCTOR METHODS
+
+The following constructor methods are available:
+
+=over 4
+
+=item $ua = LWP::UserAgent->new( %options )
+
+This method constructs a new C<LWP::UserAgent> object and returns it.
+Key/value pair arguments may be provided to set up the initial state.
+The following options correspond to attribute methods described below:
+
+ KEY DEFAULT
+ ----------- --------------------
+ agent "libwww-perl/#.###"
+ from undef
+ conn_cache undef
+ cookie_jar undef
+ default_headers HTTP::Headers->new
+ local_address undef
+ max_size undef
+ max_redirect 7
+ parse_head 1
+ protocols_allowed undef
+ protocols_forbidden undef
+ requests_redirectable ['GET', 'HEAD']
+ timeout 180
+
+The following additional options are also accepted: If the
+C<env_proxy> option is passed in with a TRUE value, then proxy
+settings are read from environment variables (see env_proxy() method
+below). If the C<keep_alive> option is passed in, then a
+C<LWP::ConnCache> is set up (see conn_cache() method below). The
+C<keep_alive> value is passed on as the C<total_capacity> for the
+connection cache.
+
+=item $ua->clone
+
+Returns a copy of the LWP::UserAgent object.
+
+=back
+
+=head1 ATTRIBUTES
+
+The settings of the configuration attributes modify the behaviour of the
+C<LWP::UserAgent> when it dispatches requests. Most of these can also
+be initialized by options passed to the constructor method.
+
+The following attribute methods are provided. The attribute value is
+left unchanged if no argument is given. The return value from each
+method is the old attribute value.
+
+=over
+
+=item $ua->agent
+
+=item $ua->agent( $product_id )
+
+Get/set the product token that is used to identify the user agent on
+the network. The agent value is sent as the "User-Agent" header in
+the requests. The default is the string returned by the _agent()
+method (see below).
+
+If the $product_id ends with space then the _agent() string is
+appended to it.
+
+The user agent string should be one or more simple product identifiers
+with an optional version number separated by the "/" character.
+Examples are:
+
+ $ua->agent('Checkbot/0.4 ' . $ua->_agent);
+ $ua->agent('Checkbot/0.4 '); # same as above
+ $ua->agent('Mozilla/5.0');
+ $ua->agent(""); # don't identify
+
+=item $ua->_agent
+
+Returns the default agent identifier. This is a string of the form
+"libwww-perl/#.###", where "#.###" is substituted with the version number
+of this library.
+
+=item $ua->from
+
+=item $ua->from( $email_address )
+
+Get/set the e-mail address for the human user who controls
+the requesting user agent. The address should be machine-usable, as
+defined in RFC 822. The C<from> value is send as the "From" header in
+the requests. Example:
+
+ $ua->from('gaas@cpan.org');
+
+The default is to not send a "From" header. See the default_headers()
+method for the more general interface that allow any header to be defaulted.
+
+=item $ua->cookie_jar
+
+=item $ua->cookie_jar( $cookie_jar_obj )
+
+Get/set the cookie jar object to use. The only requirement is that
+the cookie jar object must implement the extract_cookies($request) and
+add_cookie_header($response) methods. These methods will then be
+invoked by the user agent as requests are sent and responses are
+received. Normally this will be a C<HTTP::Cookies> object or some
+subclass.
+
+The default is to have no cookie_jar, i.e. never automatically add
+"Cookie" headers to the requests.
+
+Shortcut: If a reference to a plain hash is passed in as the
+$cookie_jar_object, then it is replaced with an instance of
+C<HTTP::Cookies> that is initialized based on the hash. This form also
+automatically loads the C<HTTP::Cookies> module. It means that:
+
+ $ua->cookie_jar({ file => "$ENV{HOME}/.cookies.txt" });
+
+is really just a shortcut for:
+
+ require HTTP::Cookies;
+ $ua->cookie_jar(HTTP::Cookies->new(file => "$ENV{HOME}/.cookies.txt"));
+
+=item $ua->default_headers
+
+=item $ua->default_headers( $headers_obj )
+
+Get/set the headers object that will provide default header values for
+any requests sent. By default this will be an empty C<HTTP::Headers>
+object.
+
+=item $ua->default_header( $field )
+
+=item $ua->default_header( $field => $value )
+
+This is just a short-cut for $ua->default_headers->header( $field =>
+$value ). Example:
+
+ $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
+ $ua->default_header('Accept-Language' => "no, en");
+
+=item $ua->conn_cache
+
+=item $ua->conn_cache( $cache_obj )
+
+Get/set the C<LWP::ConnCache> object to use. See L<LWP::ConnCache>
+for details.
+
+=item $ua->credentials( $netloc, $realm )
+
+=item $ua->credentials( $netloc, $realm, $uname, $pass )
+
+Get/set the user name and password to be used for a realm.
+
+The $netloc is a string of the form "<host>:<port>". The username and
+password will only be passed to this server. Example:
+
+ $ua->credentials("www.example.com:80", "Some Realm", "foo", "secret");
+
+=item $ua->local_address
+
+=item $ua->local_address( $address )
+
+Get/set the local interface to bind to for network connections. The interface
+can be specified as a hostname or an IP address. This value is passed as the
+C<LocalAddr> argument to L<IO::Socket::INET>.
+
+=item $ua->max_size
+
+=item $ua->max_size( $bytes )
+
+Get/set the size limit for response content. The default is C<undef>,
+which means that there is no limit. If the returned response content
+is only partial, because the size limit was exceeded, then a
+"Client-Aborted" header will be added to the response. The content
+might end up longer than C<max_size> as we abort once appending a
+chunk of data makes the length exceed the limit. The "Content-Length"
+header, if present, will indicate the length of the full content and
+will normally not be the same as C<< length($res->content) >>.
+
+=item $ua->max_redirect
+
+=item $ua->max_redirect( $n )
+
+This reads or sets the object's limit of how many times it will obey
+redirection responses in a given request cycle.
+
+By default, the value is 7. This means that if you call request()
+method and the response is a redirect elsewhere which is in turn a
+redirect, and so on seven times, then LWP gives up after that seventh
+request.
+
+=item $ua->parse_head
+
+=item $ua->parse_head( $boolean )
+
+Get/set a value indicating whether we should initialize response
+headers from the E<lt>head> section of HTML documents. The default is
+TRUE. Do not turn this off, unless you know what you are doing.
+
+=item $ua->protocols_allowed
+
+=item $ua->protocols_allowed( \@protocols )
+
+This reads (or sets) this user agent's list of protocols that the
+request methods will exclusively allow. The protocol names are case
+insensitive.
+
+For example: C<$ua-E<gt>protocols_allowed( [ 'http', 'https'] );>
+means that this user agent will I<allow only> those protocols,
+and attempts to use this user agent to access URLs with any other
+schemes (like "ftp://...") will result in a 500 error.
+
+To delete the list, call: C<$ua-E<gt>protocols_allowed(undef)>
+
+By default, an object has neither a C<protocols_allowed> list, nor a
+C<protocols_forbidden> list.
+
+Note that having a C<protocols_allowed> list causes any
+C<protocols_forbidden> list to be ignored.
+
+=item $ua->protocols_forbidden
+
+=item $ua->protocols_forbidden( \@protocols )
+
+This reads (or sets) this user agent's list of protocols that the
+request method will I<not> allow. The protocol names are case
+insensitive.
+
+For example: C<$ua-E<gt>protocols_forbidden( [ 'file', 'mailto'] );>
+means that this user agent will I<not> allow those protocols, and
+attempts to use this user agent to access URLs with those schemes
+will result in a 500 error.
+
+To delete the list, call: C<$ua-E<gt>protocols_forbidden(undef)>
+
+=item $ua->requests_redirectable
+
+=item $ua->requests_redirectable( \@requests )
+
+This reads or sets the object's list of request names that
+C<$ua-E<gt>redirect_ok(...)> will allow redirection for. By
+default, this is C<['GET', 'HEAD']>, as per RFC 2616. To
+change to include 'POST', consider:
+
+ push @{ $ua->requests_redirectable }, 'POST';
+
+=item $ua->show_progress
+
+=item $ua->show_progress( $boolean )
+
+Get/set a value indicating whether a progress bar should be displayed
+on on the terminal as requests are processed. The default is FALSE.
+
+=item $ua->timeout
+
+=item $ua->timeout( $secs )
+
+Get/set the timeout value in seconds. The default timeout() value is
+180 seconds, i.e. 3 minutes.
+
+The requests is aborted if no activity on the connection to the server
+is observed for C<timeout> seconds. This means that the time it takes
+for the complete transaction and the request() method to actually
+return might be longer.
+
+=back
+
+=head2 Proxy attributes
+
+The following methods set up when requests should be passed via a
+proxy server.
+
+=over
+
+=item $ua->proxy(\@schemes, $proxy_url)
+
+=item $ua->proxy($scheme, $proxy_url)
+
+Set/retrieve proxy URL for a scheme:
+
+ $ua->proxy(['http', 'ftp'], 'http://proxy.sn.no:8001/');
+ $ua->proxy('gopher', 'http://proxy.sn.no:8001/');
+
+The first form specifies that the URL is to be used for proxying of
+access methods listed in the list in the first method argument,
+i.e. 'http' and 'ftp'.
+
+The second form shows a shorthand form for specifying
+proxy URL for a single access scheme.
+
+=item $ua->no_proxy( $domain, ... )
+
+Do not proxy requests to the given domains. Calling no_proxy without
+any domains clears the list of domains. Eg:
+
+ $ua->no_proxy('localhost', 'example.com');
+
+=item $ua->env_proxy
+
+Load proxy settings from *_proxy environment variables. You might
+specify proxies like this (sh-syntax):
+
+ gopher_proxy=http://proxy.my.place/
+ wais_proxy=http://proxy.my.place/
+ no_proxy="localhost,example.com"
+ export gopher_proxy wais_proxy no_proxy
+
+csh or tcsh users should use the C<setenv> command to define these
+environment variables.
+
+On systems with case insensitive environment variables there exists a
+name clash between the CGI environment variables and the C<HTTP_PROXY>
+environment variable normally picked up by env_proxy(). Because of
+this C<HTTP_PROXY> is not honored for CGI scripts. The
+C<CGI_HTTP_PROXY> environment variable can be used instead.
+
+=back
+
+=head2 Handlers
+
+Handlers are code that injected at various phases during the
+processing of requests. The following methods are provided to manage
+the active handlers:
+
+=over
+
+=item $ua->add_handler( $phase => \&cb, %matchspec )
+
+Add handler to be invoked in the given processing phase. For how to
+specify %matchspec see L<HTTP::Config/"Matching">.
+
+The possible values $phase and the corresponding callback signatures are:
+
+=over
+
+=item request_preprepare => sub { my($request, $ua, $h) = @_; ... }
+
+The handler is called before the C<request_prepare> and other standard
+initialization of of the request. This can be used to set up headers
+and attributes that the C<request_prepare> handler depends on. Proxy
+initialization should take place here; but in general don't register
+handlers for this phase.
+
+=item request_prepare => sub { my($request, $ua, $h) = @_; ... }
+
+The handler is called before the request is sent and can modify the
+request any way it see fit. This can for instance be used to add
+certain headers to specific requests.
+
+The method can assign a new request object to $_[0] to replace the
+request that is sent fully.
+
+The return value from the callback is ignored. If an exceptions is
+raised it will abort the request and make the request method return a
+"400 Bad request" response.
+
+=item request_send => sub { my($request, $ua, $h) = @_; ... }
+
+This handler get a chance of handling requests before it's sent to the
+protocol handlers. It should return an HTTP::Response object if it
+wishes to terminate the processing; otherwise it should return nothing.
+
+The C<response_header> and C<response_data> handlers will not be
+invoked for this response, but the C<response_done> will be.
+
+=item response_header => sub { my($response, $ua, $h) = @_; ... }
+
+This handler is called right after the response headers have been
+received, but before any content data. The handler might set up
+handlers for data and might croak to abort the request.
+
+The handler might set the $response->{default_add_content} value to
+control if any received data should be added to the response object
+directly. This will initially be false if the $ua->request() method
+was called with a $content_file or $content_cb argument; otherwise true.
+
+=item response_data => sub { my($response, $ua, $h, $data) = @_; ... }
+
+This handlers is called for each chunk of data received for the
+response. The handler might croak to abort the request.
+
+This handler need to return a TRUE value to be called again for
+subsequent chunks for the same request.
+
+=item response_done => sub { my($response, $ua, $h) = @_; ... }
+
+The handler is called after the response has been fully received, but
+before any redirect handling is attempted. The handler can be used to
+extract information or modify the response.
+
+=item response_redirect => sub { my($response, $ua, $h) = @_; ... }
+
+The handler is called in $ua->request after C<response_done>. If the
+handler return an HTTP::Request object we'll start over with processing
+this request instead.
+
+=back
+
+=item $ua->remove_handler( undef, %matchspec )
+
+=item $ua->remove_handler( $phase, %matchspec )
+
+Remove handlers that match the given %matchspec. If $phase is not
+provided remove handlers from all phases.
+
+Be careful as calling this function with %matchspec that is not not
+specific enough can remove handlers not owned by you. It's probably
+better to use the set_my_handler() method instead.
+
+The removed handlers are returned.
+
+=item $ua->set_my_handler( $phase, $cb, %matchspec )
+
+Set handlers private to the executing subroutine. Works by defaulting
+an C<owner> field to the %matchspec that holds the name of the called
+subroutine. You might pass an explicit C<owner> to override this.
+
+If $cb is passed as C<undef>, remove the handler.
+
+=item $ua->get_my_handler( $phase, %matchspec )
+
+=item $ua->get_my_handler( $phase, %matchspec, $init )
+
+Will retrieve the matching handler as hash ref.
+
+If C<$init> is passed passed as a TRUE value, create and add the
+handler if it's not found. If $init is a subroutine reference, then
+it's called with the created handler hash as argument. This sub might
+populate the hash with extra fields; especially the callback. If
+$init is a hash reference, merge the hashes.
+
+=item $ua->handlers( $phase, $request )
+
+=item $ua->handlers( $phase, $response )
+
+Returns the handlers that apply to the given request or response at
+the given processing phase.
+
+=back
+
+=head1 REQUEST METHODS
+
+The methods described in this section are used to dispatch requests
+via the user agent. The following request methods are provided:
+
+=over
+
+=item $ua->get( $url )
+
+=item $ua->get( $url , $field_name => $value, ... )
+
+This method will dispatch a C<GET> request on the given $url. Further
+arguments can be given to initialize the headers of the request. These
+are given as separate name/value pairs. The return value is a
+response object. See L<HTTP::Response> for a description of the
+interface it provides.
+
+There will still be a response object returned when LWP can't connect to the
+server specified in the URL or when other failures in protocol handlers occur.
+These internal responses use the standard HTTP status codes, so the responses
+can't be differentiated by testing the response status code alone. Error
+responses that LWP generates internally will have the "Client-Warning" header
+set to the value "Internal response". If you need to differentiate these
+internal responses from responses that a remote server actually generates, you
+need to test this header value.
+
+Fields names that start with ":" are special. These will not
+initialize headers of the request but will determine how the response
+content is treated. The following special field names are recognized:
+
+ :content_file => $filename
+ :content_cb => \&callback
+ :read_size_hint => $bytes
+
+If a $filename is provided with the C<:content_file> option, then the
+response content will be saved here instead of in the response
+object. If a callback is provided with the C<:content_cb> option then
+this function will be called for each chunk of the response content as
+it is received from the server. If neither of these options are
+given, then the response content will accumulate in the response
+object itself. This might not be suitable for very large response
+bodies. Only one of C<:content_file> or C<:content_cb> can be
+specified. The content of unsuccessful responses will always
+accumulate in the response object itself, regardless of the
+C<:content_file> or C<:content_cb> options passed in.
+
+The C<:read_size_hint> option is passed to the protocol module which
+will try to read data from the server in chunks of this size. A
+smaller value for the C<:read_size_hint> will result in a higher
+number of callback invocations.
+
+The callback function is called with 3 arguments: a chunk of data, a
+reference to the response object, and a reference to the protocol
+object. The callback can abort the request by invoking die(). The
+exception message will show up as the "X-Died" header field in the
+response returned by the get() function.
+
+=item $ua->head( $url )
+
+=item $ua->head( $url , $field_name => $value, ... )
+
+This method will dispatch a C<HEAD> request on the given $url.
+Otherwise it works like the get() method described above.
+
+=item $ua->post( $url, \%form )
+
+=item $ua->post( $url, \@form )
+
+=item $ua->post( $url, \%form, $field_name => $value, ... )
+
+=item $ua->post( $url, $field_name => $value,... Content => \%form )
+
+=item $ua->post( $url, $field_name => $value,... Content => \@form )
+
+=item $ua->post( $url, $field_name => $value,... Content => $content )
+
+This method will dispatch a C<POST> request on the given $url, with
+%form or @form providing the key/value pairs for the fill-in form
+content. Additional headers and content options are the same as for
+the get() method.
+
+This method will use the POST() function from C<HTTP::Request::Common>
+to build the request. See L<HTTP::Request::Common> for a details on
+how to pass form content and other advanced features.
+
+=item $ua->mirror( $url, $filename )
+
+This method will get the document identified by $url and store it in
+file called $filename. If the file already exists, then the request
+will contain an "If-Modified-Since" header matching the modification
+time of the file. If the document on the server has not changed since
+this time, then nothing happens. If the document has been updated, it
+will be downloaded again. The modification time of the file will be
+forced to match that of the server.
+
+The return value is the the response object.
+
+=item $ua->request( $request )
+
+=item $ua->request( $request, $content_file )
+
+=item $ua->request( $request, $content_cb )
+
+=item $ua->request( $request, $content_cb, $read_size_hint )
+
+This method will dispatch the given $request object. Normally this
+will be an instance of the C<HTTP::Request> class, but any object with
+a similar interface will do. The return value is a response object.
+See L<HTTP::Request> and L<HTTP::Response> for a description of the
+interface provided by these classes.
+
+The request() method will process redirects and authentication
+responses transparently. This means that it may actually send several
+simple requests via the simple_request() method described below.
+
+The request methods described above; get(), head(), post() and
+mirror(), will all dispatch the request they build via this method.
+They are convenience methods that simply hides the creation of the
+request object for you.
+
+The $content_file, $content_cb and $read_size_hint all correspond to
+options described with the get() method above.
+
+You are allowed to use a CODE reference as C<content> in the request
+object passed in. The C<content> function should return the content
+when called. The content can be returned in chunks. The content
+function will be invoked repeatedly until it return an empty string to
+signal that there is no more content.
+
+=item $ua->simple_request( $request )
+
+=item $ua->simple_request( $request, $content_file )
+
+=item $ua->simple_request( $request, $content_cb )
+
+=item $ua->simple_request( $request, $content_cb, $read_size_hint )
+
+This method dispatches a single request and returns the response
+received. Arguments are the same as for request() described above.
+
+The difference from request() is that simple_request() will not try to
+handle redirects or authentication responses. The request() method
+will in fact invoke this method for each simple request it sends.
+
+=item $ua->is_protocol_supported( $scheme )
+
+You can use this method to test whether this user agent object supports the
+specified C<scheme>. (The C<scheme> might be a string (like 'http' or
+'ftp') or it might be an URI object reference.)
+
+Whether a scheme is supported, is determined by the user agent's
+C<protocols_allowed> or C<protocols_forbidden> lists (if any), and by
+the capabilities of LWP. I.e., this will return TRUE only if LWP
+supports this protocol I<and> it's permitted for this particular
+object.
+
+=back
+
+=head2 Callback methods
+
+The following methods will be invoked as requests are processed. These
+methods are documented here because subclasses of C<LWP::UserAgent>
+might want to override their behaviour.
+
+=over
+
+=item $ua->prepare_request( $request )
+
+This method is invoked by simple_request(). Its task is to modify the
+given $request object by setting up various headers based on the
+attributes of the user agent. The return value should normally be the
+$request object passed in. If a different request object is returned
+it will be the one actually processed.
+
+The headers affected by the base implementation are; "User-Agent",
+"From", "Range" and "Cookie".
+
+=item $ua->redirect_ok( $prospective_request, $response )
+
+This method is called by request() before it tries to follow a
+redirection to the request in $response. This should return a TRUE
+value if this redirection is permissible. The $prospective_request
+will be the request to be sent if this method returns TRUE.
+
+The base implementation will return FALSE unless the method
+is in the object's C<requests_redirectable> list,
+FALSE if the proposed redirection is to a "file://..."
+URL, and TRUE otherwise.
+
+=item $ua->get_basic_credentials( $realm, $uri, $isproxy )
+
+This is called by request() to retrieve credentials for documents
+protected by Basic or Digest Authentication. The arguments passed in
+is the $realm provided by the server, the $uri requested and a boolean
+flag to indicate if this is authentication against a proxy server.
+
+The method should return a username and password. It should return an
+empty list to abort the authentication resolution attempt. Subclasses
+can override this method to prompt the user for the information. An
+example of this can be found in C<lwp-request> program distributed
+with this library.
+
+The base implementation simply checks a set of pre-stored member
+variables, set up with the credentials() method.
+
+=item $ua->progress( $status, $request_or_response )
+
+This is called frequently as the response is received regardless of
+how the content is processed. The method is called with $status
+"begin" at the start of processing the request and with $state "end"
+before the request method returns. In between these $status will be
+the fraction of the response currently received or the string "tick"
+if the fraction can't be calculated.
+
+When $status is "begin" the second argument is the request object,
+otherwise it is the response object.
+
+=back
+
+=head1 SEE ALSO
+
+See L<LWP> for a complete overview of libwww-perl5. See L<lwpcook>
+and the scripts F<lwp-request> and F<lwp-download> for examples of
+usage.
+
+See L<HTTP::Request> and L<HTTP::Response> for a description of the
+message objects dispatched and received. See L<HTTP::Request::Common>
+and L<HTML::Form> for other ways to build request objects.
+
+See L<WWW::Mechanize> and L<WWW::Search> for examples of more
+specialized user agents based on C<LWP::UserAgent>.
+
+=head1 COPYRIGHT
+
+Copyright 1995-2009 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+# This is a comment. I love comments.
+
+# This file controls what Internet media types are sent to the client for
+# given file extension(s). Sending the correct media type to the client
+# is important so they know how to handle the content of the file.
+# For more information about Internet media types, please read RFC 2045,
+# 2046, 2047, 2048, and 2077. The Internet media type registry is
+# at <http://www.iana.org/assignments/media-types/>.
+
+# MIME type Extensions
+application/activemessage
+application/andrew-inset ez
+application/applefile
+application/atom+xml atom
+application/atomcat+xml atomcat
+application/atomicmail
+application/atomsvc+xml atomsvc
+application/auth-policy+xml
+application/batch-smtp
+application/beep+xml
+application/cals-1840
+application/ccxml+xml ccxml
+application/cellml+xml
+application/cnrp+xml
+application/commonground
+application/conference-info+xml
+application/cpl+xml
+application/csta+xml
+application/cstadata+xml
+application/cybercash
+application/davmount+xml davmount
+application/dca-rft
+application/dec-dx
+application/dialog-info+xml
+application/dicom
+application/dns
+application/dvcs
+application/ecmascript ecma
+application/edi-consent
+application/edi-x12
+application/edifact
+application/epp+xml
+application/eshop
+application/fastinfoset
+application/fastsoap
+application/fits
+application/font-tdpfr pfr
+application/h224
+application/http
+application/hyperstudio stk
+application/iges
+application/im-iscomposing+xml
+application/index
+application/index.cmd
+application/index.obj
+application/index.response
+application/index.vnd
+application/iotp
+application/ipp
+application/isup
+application/javascript js
+application/json json
+application/kpml-request+xml
+application/kpml-response+xml
+application/lost+xml lostxml
+application/mac-binhex40 hqx
+application/mac-compactpro cpt
+application/macwriteii
+application/marc mrc
+application/mathematica ma nb mb
+application/mathml+xml mathml
+application/mbms-associated-procedure-description+xml
+application/mbms-deregister+xml
+application/mbms-envelope+xml
+application/mbms-msk+xml
+application/mbms-msk-response+xml
+application/mbms-protection-description+xml
+application/mbms-reception-report+xml
+application/mbms-register+xml
+application/mbms-register-response+xml
+application/mbms-user-service-description+xml
+application/mbox mbox
+application/media_control+xml
+application/mediaservercontrol+xml mscml
+application/mikey
+application/moss-keys
+application/moss-signature
+application/mosskey-data
+application/mosskey-request
+application/mp4 mp4s
+application/mpeg4-generic
+application/mpeg4-iod
+application/mpeg4-iod-xmt
+application/msword doc dot
+application/mxf mxf
+application/nasdata
+application/news-transmission
+application/nss
+application/ocsp-request
+application/ocsp-response
+application/octet-stream bin dms lha lzh class so iso dmg dist distz pkg bpk dump elc
+application/oda oda
+application/oebps-package+xml
+application/ogg ogx
+application/parityfec
+application/patch-ops-error+xml xer
+application/pdf pdf
+application/pgp-encrypted pgp
+application/pgp-keys
+application/pgp-signature asc sig
+application/pics-rules prf
+application/pidf+xml
+application/pidf-diff+xml
+application/pkcs10 p10
+application/pkcs7-mime p7m p7c
+application/pkcs7-signature p7s
+application/pkix-cert cer
+application/pkix-crl crl
+application/pkix-pkipath pkipath
+application/pkixcmp pki
+application/pls+xml pls
+application/poc-settings+xml
+application/postscript ai eps ps
+application/prs.alvestrand.titrax-sheet
+application/prs.cww cww
+application/prs.nprend
+application/prs.plucker
+application/qsig
+application/rdf+xml rdf
+application/reginfo+xml rif
+application/relax-ng-compact-syntax rnc
+application/remote-printing
+application/resource-lists+xml rl
+application/resource-lists-diff+xml rld
+application/riscos
+application/rlmi+xml
+application/rls-services+xml rs
+application/rsd+xml rsd
+application/rss+xml rss
+application/rtf rtf
+application/rtx
+application/samlassertion+xml
+application/samlmetadata+xml
+application/sbml+xml sbml
+application/scvp-cv-request scq
+application/scvp-cv-response scs
+application/scvp-vp-request spq
+application/scvp-vp-response spp
+application/sdp sdp
+application/set-payment
+application/set-payment-initiation setpay
+application/set-registration
+application/set-registration-initiation setreg
+application/sgml
+application/sgml-open-catalog
+application/shf+xml shf
+application/sieve
+application/simple-filter+xml
+application/simple-message-summary
+application/simplesymbolcontainer
+application/slate
+application/smil
+application/smil+xml smi smil
+application/soap+fastinfoset
+application/soap+xml
+application/sparql-query rq
+application/sparql-results+xml srx
+application/spirits-event+xml
+application/srgs gram
+application/srgs+xml grxml
+application/ssml+xml ssml
+application/timestamp-query
+application/timestamp-reply
+application/tve-trigger
+application/ulpfec
+application/vemmi
+application/vividence.scriptfile
+application/vnd.3gpp.bsf+xml
+application/vnd.3gpp.pic-bw-large plb
+application/vnd.3gpp.pic-bw-small psb
+application/vnd.3gpp.pic-bw-var pvb
+application/vnd.3gpp.sms
+application/vnd.3gpp2.bcmcsinfo+xml
+application/vnd.3gpp2.sms
+application/vnd.3gpp2.tcap tcap
+application/vnd.3m.post-it-notes pwn
+application/vnd.accpac.simply.aso aso
+application/vnd.accpac.simply.imp imp
+application/vnd.acucobol acu
+application/vnd.acucorp atc acutc
+application/vnd.adobe.xdp+xml xdp
+application/vnd.adobe.xfdf xfdf
+application/vnd.aether.imp
+application/vnd.americandynamics.acc acc
+application/vnd.amiga.ami ami
+application/vnd.anser-web-certificate-issue-initiation cii
+application/vnd.anser-web-funds-transfer-initiation fti
+application/vnd.antix.game-component atx
+application/vnd.apple.installer+xml mpkg
+application/vnd.arastra.swi swi
+application/vnd.audiograph aep
+application/vnd.autopackage
+application/vnd.avistar+xml
+application/vnd.blueice.multipass mpm
+application/vnd.bmi bmi
+application/vnd.businessobjects rep
+application/vnd.cab-jscript
+application/vnd.canon-cpdl
+application/vnd.canon-lips
+application/vnd.cendio.thinlinc.clientconf
+application/vnd.chemdraw+xml cdxml
+application/vnd.chipnuts.karaoke-mmd mmd
+application/vnd.cinderella cdy
+application/vnd.cirpack.isdn-ext
+application/vnd.claymore cla
+application/vnd.clonk.c4group c4g c4d c4f c4p c4u
+application/vnd.commerce-battelle
+application/vnd.commonspace csp cst
+application/vnd.contact.cmsg cdbcmsg
+application/vnd.cosmocaller cmc
+application/vnd.crick.clicker clkx
+application/vnd.crick.clicker.keyboard clkk
+application/vnd.crick.clicker.palette clkp
+application/vnd.crick.clicker.template clkt
+application/vnd.crick.clicker.wordbank clkw
+application/vnd.criticaltools.wbs+xml wbs
+application/vnd.ctc-posml pml
+application/vnd.ctct.ws+xml
+application/vnd.cups-pdf
+application/vnd.cups-postscript
+application/vnd.cups-ppd ppd
+application/vnd.cups-raster
+application/vnd.cups-raw
+application/vnd.curl curl
+application/vnd.cybank
+application/vnd.data-vision.rdz rdz
+application/vnd.denovo.fcselayout-link fe_launch
+application/vnd.dna dna
+application/vnd.dolby.mlp mlp
+application/vnd.dpgraph dpg
+application/vnd.dreamfactory dfac
+application/vnd.dvb.esgcontainer
+application/vnd.dvb.ipdcesgaccess
+application/vnd.dvb.iptv.alfec-base
+application/vnd.dvb.iptv.alfec-enhancement
+application/vnd.dxr
+application/vnd.ecdis-update
+application/vnd.ecowin.chart mag
+application/vnd.ecowin.filerequest
+application/vnd.ecowin.fileupdate
+application/vnd.ecowin.series
+application/vnd.ecowin.seriesrequest
+application/vnd.ecowin.seriesupdate
+application/vnd.enliven nml
+application/vnd.epson.esf esf
+application/vnd.epson.msf msf
+application/vnd.epson.quickanime qam
+application/vnd.epson.salt slt
+application/vnd.epson.ssf ssf
+application/vnd.ericsson.quickcall
+application/vnd.eszigno3+xml es3 et3
+application/vnd.eudora.data
+application/vnd.ezpix-album ez2
+application/vnd.ezpix-package ez3
+application/vnd.fdf fdf
+application/vnd.ffsns
+application/vnd.fints
+application/vnd.flographit gph
+application/vnd.fluxtime.clip ftc
+application/vnd.font-fontforge-sfd
+application/vnd.framemaker fm frame maker
+application/vnd.frogans.fnc fnc
+application/vnd.frogans.ltf ltf
+application/vnd.fsc.weblaunch fsc
+application/vnd.fujitsu.oasys oas
+application/vnd.fujitsu.oasys2 oa2
+application/vnd.fujitsu.oasys3 oa3
+application/vnd.fujitsu.oasysgp fg5
+application/vnd.fujitsu.oasysprs bh2
+application/vnd.fujixerox.art-ex
+application/vnd.fujixerox.art4
+application/vnd.fujixerox.hbpl
+application/vnd.fujixerox.ddd ddd
+application/vnd.fujixerox.docuworks xdw
+application/vnd.fujixerox.docuworks.binder xbd
+application/vnd.fut-misnet
+application/vnd.fuzzysheet fzs
+application/vnd.genomatix.tuxedo txd
+application/vnd.gmx gmx
+application/vnd.google-earth.kml+xml kml
+application/vnd.google-earth.kmz kmz
+application/vnd.grafeq gqf gqs
+application/vnd.gridmp
+application/vnd.groove-account gac
+application/vnd.groove-help ghf
+application/vnd.groove-identity-message gim
+application/vnd.groove-injector grv
+application/vnd.groove-tool-message gtm
+application/vnd.groove-tool-template tpl
+application/vnd.groove-vcard vcg
+application/vnd.handheld-entertainment+xml zmm
+application/vnd.hbci hbci
+application/vnd.hcl-bireports
+application/vnd.hhe.lesson-player les
+application/vnd.hp-hpgl hpgl
+application/vnd.hp-hpid hpid
+application/vnd.hp-hps hps
+application/vnd.hp-jlyt jlt
+application/vnd.hp-pcl pcl
+application/vnd.hp-pclxl pclxl
+application/vnd.httphone
+application/vnd.hydrostatix.sof-data sfd-hdstx
+application/vnd.hzn-3d-crossword x3d
+application/vnd.ibm.afplinedata
+application/vnd.ibm.electronic-media
+application/vnd.ibm.minipay mpy
+application/vnd.ibm.modcap afp listafp list3820
+application/vnd.ibm.rights-management irm
+application/vnd.ibm.secure-container sc
+application/vnd.iccprofile icc icm
+application/vnd.igloader igl
+application/vnd.immervision-ivp ivp
+application/vnd.immervision-ivu ivu
+application/vnd.informedcontrol.rms+xml
+application/vnd.intercon.formnet xpw xpx
+application/vnd.intertrust.digibox
+application/vnd.intertrust.nncp
+application/vnd.intu.qbo qbo
+application/vnd.intu.qfx qfx
+application/vnd.iptc.g2.conceptitem+xml
+application/vnd.iptc.g2.knowledgeitem+xml
+application/vnd.iptc.g2.newsitem+xml
+application/vnd.iptc.g2.packageitem+xml
+application/vnd.ipunplugged.rcprofile rcprofile
+application/vnd.irepository.package+xml irp
+application/vnd.is-xpr xpr
+application/vnd.jam jam
+application/vnd.japannet-directory-service
+application/vnd.japannet-jpnstore-wakeup
+application/vnd.japannet-payment-wakeup
+application/vnd.japannet-registration
+application/vnd.japannet-registration-wakeup
+application/vnd.japannet-setstore-wakeup
+application/vnd.japannet-verification
+application/vnd.japannet-verification-wakeup
+application/vnd.jcp.javame.midlet-rms rms
+application/vnd.jisp jisp
+application/vnd.joost.joda-archive joda
+application/vnd.kahootz ktz ktr
+application/vnd.kde.karbon karbon
+application/vnd.kde.kchart chrt
+application/vnd.kde.kformula kfo
+application/vnd.kde.kivio flw
+application/vnd.kde.kontour kon
+application/vnd.kde.kpresenter kpr kpt
+application/vnd.kde.kspread ksp
+application/vnd.kde.kword kwd kwt
+application/vnd.kenameaapp htke
+application/vnd.kidspiration kia
+application/vnd.kinar kne knp
+application/vnd.koan skp skd skt skm
+application/vnd.kodak-descriptor sse
+application/vnd.liberty-request+xml
+application/vnd.llamagraphics.life-balance.desktop lbd
+application/vnd.llamagraphics.life-balance.exchange+xml lbe
+application/vnd.lotus-1-2-3 123
+application/vnd.lotus-approach apr
+application/vnd.lotus-freelance pre
+application/vnd.lotus-notes nsf
+application/vnd.lotus-organizer org
+application/vnd.lotus-screencam scm
+application/vnd.lotus-wordpro lwp
+application/vnd.macports.portpkg portpkg
+application/vnd.marlin.drm.actiontoken+xml
+application/vnd.marlin.drm.conftoken+xml
+application/vnd.marlin.drm.license+xml
+application/vnd.marlin.drm.mdcf
+application/vnd.mcd mcd
+application/vnd.medcalcdata mc1
+application/vnd.mediastation.cdkey cdkey
+application/vnd.meridian-slingshot
+application/vnd.mfer mwf
+application/vnd.mfmp mfm
+application/vnd.micrografx.flo flo
+application/vnd.micrografx.igx igx
+application/vnd.mif mif
+application/vnd.minisoft-hp3000-save
+application/vnd.mitsubishi.misty-guard.trustweb
+application/vnd.mobius.daf daf
+application/vnd.mobius.dis dis
+application/vnd.mobius.mbk mbk
+application/vnd.mobius.mqy mqy
+application/vnd.mobius.msl msl
+application/vnd.mobius.plc plc
+application/vnd.mobius.txf txf
+application/vnd.mophun.application mpn
+application/vnd.mophun.certificate mpc
+application/vnd.motorola.flexsuite
+application/vnd.motorola.flexsuite.adsi
+application/vnd.motorola.flexsuite.fis
+application/vnd.motorola.flexsuite.gotap
+application/vnd.motorola.flexsuite.kmr
+application/vnd.motorola.flexsuite.ttc
+application/vnd.motorola.flexsuite.wem
+application/vnd.motorola.iprm
+application/vnd.mozilla.xul+xml xul
+application/vnd.ms-artgalry cil
+application/vnd.ms-asf asf
+application/vnd.ms-cab-compressed cab
+application/vnd.ms-excel xls xlm xla xlc xlt xlw
+application/vnd.ms-fontobject eot
+application/vnd.ms-htmlhelp chm
+application/vnd.ms-ims ims
+application/vnd.ms-lrm lrm
+application/vnd.ms-playready.initiator+xml
+application/vnd.ms-powerpoint ppt pps pot
+application/vnd.ms-project mpp mpt
+application/vnd.ms-tnef
+application/vnd.ms-wmdrm.lic-chlg-req
+application/vnd.ms-wmdrm.lic-resp
+application/vnd.ms-wmdrm.meter-chlg-req
+application/vnd.ms-wmdrm.meter-resp
+application/vnd.ms-works wps wks wcm wdb
+application/vnd.ms-wpl wpl
+application/vnd.ms-xpsdocument xps
+application/vnd.mseq mseq
+application/vnd.msign
+application/vnd.multiad.creator
+application/vnd.multiad.creator.cif
+application/vnd.music-niff
+application/vnd.musician mus
+application/vnd.muvee.style msty
+application/vnd.ncd.control
+application/vnd.ncd.reference
+application/vnd.nervana
+application/vnd.netfpx
+application/vnd.neurolanguage.nlu nlu
+application/vnd.noblenet-directory nnd
+application/vnd.noblenet-sealer nns
+application/vnd.noblenet-web nnw
+application/vnd.nokia.catalogs
+application/vnd.nokia.conml+wbxml
+application/vnd.nokia.conml+xml
+application/vnd.nokia.isds-radio-presets
+application/vnd.nokia.iptv.config+xml
+application/vnd.nokia.landmark+wbxml
+application/vnd.nokia.landmark+xml
+application/vnd.nokia.landmarkcollection+xml
+application/vnd.nokia.n-gage.ac+xml
+application/vnd.nokia.n-gage.data ngdat
+application/vnd.nokia.n-gage.symbian.install n-gage
+application/vnd.nokia.ncd
+application/vnd.nokia.pcd+wbxml
+application/vnd.nokia.pcd+xml
+application/vnd.nokia.radio-preset rpst
+application/vnd.nokia.radio-presets rpss
+application/vnd.novadigm.edm edm
+application/vnd.novadigm.edx edx
+application/vnd.novadigm.ext ext
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-web oth
+application/vnd.obn
+application/vnd.olpc-sugar xo
+application/vnd.oma-scws-config
+application/vnd.oma-scws-http-request
+application/vnd.oma-scws-http-response
+application/vnd.oma.bcast.associated-procedure-parameter+xml
+application/vnd.oma.bcast.drm-trigger+xml
+application/vnd.oma.bcast.imd+xml
+application/vnd.oma.bcast.ltkm
+application/vnd.oma.bcast.notification+xml
+application/vnd.oma.bcast.provisioningtrigger
+application/vnd.oma.bcast.sgboot
+application/vnd.oma.bcast.sgdd+xml
+application/vnd.oma.bcast.sgdu
+application/vnd.oma.bcast.simple-symbol-container
+application/vnd.oma.bcast.smartcard-trigger+xml
+application/vnd.oma.bcast.sprov+xml
+application/vnd.oma.bcast.stkm
+application/vnd.oma.dcd
+application/vnd.oma.dcdc
+application/vnd.oma.dd2+xml dd2
+application/vnd.oma.drm.risd+xml
+application/vnd.oma.group-usage-list+xml
+application/vnd.oma.poc.detailed-progress-report+xml
+application/vnd.oma.poc.final-report+xml
+application/vnd.oma.poc.groups+xml
+application/vnd.oma.poc.invocation-descriptor+xml
+application/vnd.oma.poc.optimized-progress-report+xml
+application/vnd.oma.xcap-directory+xml
+application/vnd.omads-email+xml
+application/vnd.omads-file+xml
+application/vnd.omads-folder+xml
+application/vnd.omaloc-supl-init
+application/vnd.openofficeorg.extension oxt
+application/vnd.osa.netdeploy
+application/vnd.osgi.dp dp
+application/vnd.otps.ct-kip+xml
+application/vnd.palm prc pdb pqa oprc
+application/vnd.paos.xml
+application/vnd.pg.format str
+application/vnd.pg.osasli ei6
+application/vnd.piaccess.application-licence
+application/vnd.picsel efif
+application/vnd.poc.group-advertisement+xml
+application/vnd.pocketlearn plf
+application/vnd.powerbuilder6 pbd
+application/vnd.powerbuilder6-s
+application/vnd.powerbuilder7
+application/vnd.powerbuilder7-s
+application/vnd.powerbuilder75
+application/vnd.powerbuilder75-s
+application/vnd.preminet
+application/vnd.previewsystems.box box
+application/vnd.proteus.magazine mgz
+application/vnd.publishare-delta-tree qps
+application/vnd.pvi.ptid1 ptid
+application/vnd.pwg-multiplexed
+application/vnd.pwg-xhtml-print+xml
+application/vnd.qualcomm.brew-app-res
+application/vnd.quark.quarkxpress qxd qxt qwd qwt qxl qxb
+application/vnd.rapid
+application/vnd.recordare.musicxml mxl
+application/vnd.recordare.musicxml+xml
+application/vnd.renlearn.rlprint
+application/vnd.rn-realmedia rm
+application/vnd.route66.link66+xml link66
+application/vnd.ruckus.download
+application/vnd.s3sms
+application/vnd.sbm.mid2
+application/vnd.scribus
+application/vnd.sealed.3df
+application/vnd.sealed.csf
+application/vnd.sealed.doc
+application/vnd.sealed.eml
+application/vnd.sealed.mht
+application/vnd.sealed.net
+application/vnd.sealed.ppt
+application/vnd.sealed.tiff
+application/vnd.sealed.xls
+application/vnd.sealedmedia.softseal.html
+application/vnd.sealedmedia.softseal.pdf
+application/vnd.seemail see
+application/vnd.sema sema
+application/vnd.semd semd
+application/vnd.semf semf
+application/vnd.shana.informed.formdata ifm
+application/vnd.shana.informed.formtemplate itp
+application/vnd.shana.informed.interchange iif
+application/vnd.shana.informed.package ipk
+application/vnd.simtech-mindmapper twd twds
+application/vnd.smaf mmf
+application/vnd.software602.filler.form+xml
+application/vnd.software602.filler.form-xml-zip
+application/vnd.solent.sdkm+xml sdkm sdkd
+application/vnd.spotfire.dxp dxp
+application/vnd.spotfire.sfs sfs
+application/vnd.sss-cod
+application/vnd.sss-dtf
+application/vnd.sss-ntf
+application/vnd.street-stream
+application/vnd.sun.wadl+xml
+application/vnd.sus-calendar sus susp
+application/vnd.svd svd
+application/vnd.swiftview-ics
+application/vnd.syncml+xml xsm
+application/vnd.syncml.dm+wbxml bdm
+application/vnd.syncml.dm+xml xdm
+application/vnd.syncml.ds.notification
+application/vnd.tao.intent-module-archive tao
+application/vnd.tmobile-livetv tmo
+application/vnd.trid.tpt tpt
+application/vnd.triscape.mxs mxs
+application/vnd.trueapp tra
+application/vnd.truedoc
+application/vnd.ufdl ufd ufdl
+application/vnd.uiq.theme utz
+application/vnd.umajin umj
+application/vnd.unity unityweb
+application/vnd.uoml+xml uoml
+application/vnd.uplanet.alert
+application/vnd.uplanet.alert-wbxml
+application/vnd.uplanet.bearer-choice
+application/vnd.uplanet.bearer-choice-wbxml
+application/vnd.uplanet.cacheop
+application/vnd.uplanet.cacheop-wbxml
+application/vnd.uplanet.channel
+application/vnd.uplanet.channel-wbxml
+application/vnd.uplanet.list
+application/vnd.uplanet.list-wbxml
+application/vnd.uplanet.listcmd
+application/vnd.uplanet.listcmd-wbxml
+application/vnd.uplanet.signal
+application/vnd.vcx vcx
+application/vnd.vd-study
+application/vnd.vectorworks
+application/vnd.vidsoft.vidconference
+application/vnd.visio vsd vst vss vsw
+application/vnd.visionary vis
+application/vnd.vividence.scriptfile
+application/vnd.vsf vsf
+application/vnd.wap.sic
+application/vnd.wap.slc
+application/vnd.wap.wbxml wbxml
+application/vnd.wap.wmlc wmlc
+application/vnd.wap.wmlscriptc wmlsc
+application/vnd.webturbo wtb
+application/vnd.wfa.wsc
+application/vnd.wmc
+application/vnd.wmf.bootstrap
+application/vnd.wordperfect wpd
+application/vnd.wqd wqd
+application/vnd.wrq-hp3000-labelled
+application/vnd.wt.stf stf
+application/vnd.wv.csp+wbxml
+application/vnd.wv.csp+xml
+application/vnd.wv.ssp+xml
+application/vnd.xara xar
+application/vnd.xfdl xfdl
+application/vnd.xmi+xml
+application/vnd.xmpie.cpkg
+application/vnd.xmpie.dpkg
+application/vnd.xmpie.plan
+application/vnd.xmpie.ppkg
+application/vnd.xmpie.xlim
+application/vnd.yamaha.hv-dic hvd
+application/vnd.yamaha.hv-script hvs
+application/vnd.yamaha.hv-voice hvp
+application/vnd.yamaha.smaf-audio saf
+application/vnd.yamaha.smaf-phrase spf
+application/vnd.yellowriver-custom-menu cmp
+application/vnd.zzazz.deck+xml zaz
+application/voicexml+xml vxml
+application/watcherinfo+xml
+application/whoispp-query
+application/whoispp-response
+application/winhlp hlp
+application/wita
+application/wordperfect5.1
+application/wsdl+xml wsdl
+application/wspolicy+xml wspolicy
+application/x-ace-compressed ace
+application/x-bcpio bcpio
+application/x-bittorrent torrent
+application/x-bzip bz
+application/x-bzip2 bz2 boz
+application/x-cdlink vcd
+application/x-chat chat
+application/x-chess-pgn pgn
+application/x-compress
+application/x-cpio cpio
+application/x-csh csh
+application/x-director dcr dir dxr fgd
+application/x-dvi dvi
+application/x-futuresplash spl
+application/x-gtar gtar
+application/x-gzip
+application/x-hdf hdf
+application/x-latex latex
+application/x-ms-wmd wmd
+application/x-ms-wmz wmz
+application/x-msaccess mdb
+application/x-msbinder obd
+application/x-mscardfile crd
+application/x-msclip clp
+application/x-msdownload exe dll com bat msi
+application/x-msmediaview mvb m13 m14
+application/x-msmetafile wmf
+application/x-msmoney mny
+application/x-mspublisher pub
+application/x-msschedule scd
+application/x-msterminal trm
+application/x-mswrite wri
+application/x-netcdf nc cdf
+application/x-pkcs12 p12 pfx
+application/x-pkcs7-certificates p7b spc
+application/x-pkcs7-certreqresp p7r
+application/x-rar-compressed rar
+application/x-sh sh
+application/x-shar shar
+application/x-shockwave-flash swf
+application/x-stuffit sit
+application/x-stuffitx sitx
+application/x-sv4cpio sv4cpio
+application/x-sv4crc sv4crc
+application/x-tar tar
+application/x-tcl tcl
+application/x-tex tex
+application/x-texinfo texinfo texi
+application/x-ustar ustar
+application/x-wais-source src
+application/x-x509-ca-cert der crt
+application/x400-bp
+application/xcap-att+xml
+application/xcap-caps+xml
+application/xcap-el+xml
+application/xcap-error+xml
+application/xcap-ns+xml
+application/xenc+xml xenc
+application/xhtml+xml xhtml xht
+application/xml xml xsl
+application/xml-dtd dtd
+application/xml-external-parsed-entity
+application/xmpp+xml
+application/xop+xml xop
+application/xslt+xml xslt
+application/xspf+xml xspf
+application/xv+xml mxml xhvml xvml xvm
+application/zip zip
+audio/32kadpcm
+audio/3gpp
+audio/3gpp2
+audio/ac3
+audio/amr
+audio/amr-wb
+audio/amr-wb+
+audio/asc
+audio/basic au snd
+audio/bv16
+audio/bv32
+audio/clearmode
+audio/cn
+audio/dat12
+audio/dls
+audio/dsr-es201108
+audio/dsr-es202050
+audio/dsr-es202211
+audio/dsr-es202212
+audio/dvi4
+audio/eac3
+audio/evrc
+audio/evrc-qcp
+audio/evrc0
+audio/evrc1
+audio/evrcb
+audio/evrcb0
+audio/evrcb1
+audio/evrcwb
+audio/evrcwb0
+audio/evrcwb1
+audio/g722
+audio/g7221
+audio/g723
+audio/g726-16
+audio/g726-24
+audio/g726-32
+audio/g726-40
+audio/g728
+audio/g729
+audio/g7291
+audio/g729d
+audio/g729e
+audio/gsm
+audio/gsm-efr
+audio/ilbc
+audio/l16
+audio/l20
+audio/l24
+audio/l8
+audio/lpc
+audio/midi mid midi kar rmi
+audio/mobile-xmf
+audio/mp4 mp4a
+audio/mp4a-latm
+audio/mpa
+audio/mpa-robust
+audio/mpeg mpga mp2 mp2a mp3 m2a m3a
+audio/mpeg4-generic
+audio/ogg oga ogg spx
+audio/parityfec
+audio/pcma
+audio/pcmu
+audio/prs.sid
+audio/qcelp
+audio/red
+audio/rtp-enc-aescm128
+audio/rtp-midi
+audio/rtx
+audio/smv
+audio/smv0
+audio/smv-qcp
+audio/sp-midi
+audio/t140c
+audio/t38
+audio/telephone-event
+audio/tone
+audio/ulpfec
+audio/vdvi
+audio/vmr-wb
+audio/vnd.3gpp.iufp
+audio/vnd.4sb
+audio/vnd.audiokoz
+audio/vnd.celp
+audio/vnd.cisco.nse
+audio/vnd.cmles.radio-events
+audio/vnd.cns.anp1
+audio/vnd.cns.inf1
+audio/vnd.digital-winds eol
+audio/vnd.dlna.adts
+audio/vnd.dolby.mlp
+audio/vnd.dts dts
+audio/vnd.dts.hd dtshd
+audio/vnd.everad.plj
+audio/vnd.hns.audio
+audio/vnd.lucent.voice lvp
+audio/vnd.ms-playready.media.pya pya
+audio/vnd.nokia.mobile-xmf
+audio/vnd.nortel.vbk
+audio/vnd.nuera.ecelp4800 ecelp4800
+audio/vnd.nuera.ecelp7470 ecelp7470
+audio/vnd.nuera.ecelp9600 ecelp9600
+audio/vnd.octel.sbc
+audio/vnd.qcelp
+audio/vnd.rhetorex.32kadpcm
+audio/vnd.sealedmedia.softseal.mpeg
+audio/vnd.vmx.cvsd
+audio/vorbis
+audio/vorbis-config
+audio/wav wav
+audio/x-aiff aif aiff aifc
+audio/x-mpegurl m3u
+audio/x-ms-wax wax
+audio/x-ms-wma wma
+audio/x-pn-realaudio ram ra
+audio/x-pn-realaudio-plugin rmp
+audio/x-wav wav
+chemical/x-cdx cdx
+chemical/x-cif cif
+chemical/x-cmdf cmdf
+chemical/x-cml cml
+chemical/x-csml csml
+chemical/x-pdb pdb
+chemical/x-xyz xyz
+image/bmp bmp
+image/cgm cgm
+image/fits
+image/g3fax g3
+image/gif gif
+image/ief ief
+image/jp2
+image/jpeg jpeg jpg jpe
+image/jpm
+image/jpx
+image/naplps
+image/png png
+image/prs.btif btif
+image/prs.pti
+image/svg+xml svg svgz
+image/t38
+image/tiff tiff tif
+image/tiff-fx
+image/vnd.adobe.photoshop psd
+image/vnd.cns.inf2
+image/vnd.djvu djvu djv
+image/vnd.dwg dwg
+image/vnd.dxf dxf
+image/vnd.fastbidsheet fbs
+image/vnd.fpx fpx
+image/vnd.fst fst
+image/vnd.fujixerox.edmics-mmr mmr
+image/vnd.fujixerox.edmics-rlc rlc
+image/vnd.globalgraphics.pgb
+image/vnd.microsoft.icon
+image/vnd.mix
+image/vnd.ms-modi mdi
+image/vnd.net-fpx npx
+image/vnd.sealed.png
+image/vnd.sealedmedia.softseal.gif
+image/vnd.sealedmedia.softseal.jpg
+image/vnd.svf
+image/vnd.wap.wbmp wbmp
+image/vnd.xiff xif
+image/x-cmu-raster ras
+image/x-cmx cmx
+image/x-icon ico
+image/x-pcx pcx
+image/x-pict pic pct
+image/x-portable-anymap pnm
+image/x-portable-bitmap pbm
+image/x-portable-graymap pgm
+image/x-portable-pixmap ppm
+image/x-rgb rgb
+image/x-xbitmap xbm
+image/x-xpixmap xpm
+image/x-xwindowdump xwd
+message/cpim
+message/delivery-status
+message/disposition-notification
+message/external-body
+message/global
+message/global-delivery-status
+message/global-disposition-notification
+message/global-headers
+message/http
+message/news
+message/partial
+message/rfc822 eml mime
+message/s-http
+message/sip
+message/sipfrag
+message/tracking-status
+message/vnd.si.simp
+model/iges igs iges
+model/mesh msh mesh silo
+model/vnd.dwf dwf
+model/vnd.flatland.3dml
+model/vnd.gdl gdl
+model/vnd.gs.gdl
+model/vnd.gtw gtw
+model/vnd.moml+xml
+model/vnd.mts mts
+model/vnd.parasolid.transmit.binary
+model/vnd.parasolid.transmit.text
+model/vnd.vtu vtu
+model/vrml wrl vrml
+multipart/alternative
+multipart/appledouble
+multipart/byteranges
+multipart/digest
+multipart/encrypted
+multipart/form-data
+multipart/header-set
+multipart/mixed
+multipart/parallel
+multipart/related
+multipart/report
+multipart/signed
+multipart/voice-message
+text/calendar ics ifb
+text/css css
+text/csv csv
+text/directory
+text/dns
+text/enriched
+text/html html htm
+text/parityfec
+text/plain txt text conf def list log in
+text/prs.fallenstein.rst
+text/prs.lines.tag dsc
+text/red
+text/rfc822-headers
+text/richtext rtx
+text/rtf
+text/rtp-enc-aescm128
+text/rtx
+text/sgml sgml sgm
+text/t140
+text/tab-separated-values tsv
+text/troff t tr roff man me ms
+text/ulpfec
+text/uri-list uri uris urls
+text/vnd.abc
+text/vnd.curl
+text/vnd.dmclientscript
+text/vnd.esmertec.theme-descriptor
+text/vnd.fly fly
+text/vnd.fmi.flexstor flx
+text/vnd.graphviz gv
+text/vnd.in3d.3dml 3dml
+text/vnd.in3d.spot spot
+text/vnd.iptc.newsml
+text/vnd.iptc.nitf
+text/vnd.latex-z
+text/vnd.motorola.reflex
+text/vnd.ms-mediapackage
+text/vnd.net2phone.commcenter.command
+text/vnd.si.uricatalogue
+text/vnd.sun.j2me.app-descriptor jad
+text/vnd.trolltech.linguist
+text/vnd.wap.si
+text/vnd.wap.sl
+text/vnd.wap.wml wml
+text/vnd.wap.wmlscript wmls
+text/x-asm s asm
+text/x-c c cc cxx cpp h hh dic
+text/x-fortran f for f77 f90
+text/x-pascal p pas
+text/x-java-source java
+text/x-setext etx
+text/x-uuencode uu
+text/x-vcalendar vcs
+text/x-vcard vcf
+text/xml
+text/xml-external-parsed-entity
+video/3gpp 3gp
+video/3gpp-tt
+video/3gpp2 3g2
+video/bmpeg
+video/bt656
+video/celb
+video/dv
+video/h261 h261
+video/h263 h263
+video/h263-1998
+video/h263-2000
+video/h264 h264
+video/jpeg jpgv
+video/jpeg2000
+video/jpm jpm jpgm
+video/mj2 mj2 mjp2
+video/mp1s
+video/mp2p
+video/mp2t
+video/mp4 mp4 mp4v mpg4
+video/mp4v-es
+video/mpeg mpeg mpg mpe m1v m2v
+video/mpeg4-generic
+video/mpv
+video/nv
+video/ogg ogv
+video/parityfec
+video/pointer
+video/quicktime qt mov
+video/raw
+video/rtp-enc-aescm128
+video/rtx
+video/smpte292m
+video/ulpfec
+video/vc1
+video/vnd.cctv
+video/vnd.dlna.mpeg-tts
+video/vnd.fvt fvt
+video/vnd.hns.video
+video/vnd.iptvforum.1dparityfec-1010
+video/vnd.iptvforum.1dparityfec-2005
+video/vnd.iptvforum.2dparityfec-1010
+video/vnd.iptvforum.2dparityfec-2005
+video/vnd.iptvforum.ttsavc
+video/vnd.iptvforum.ttsmpeg2
+video/vnd.motorola.video
+video/vnd.motorola.videop
+video/vnd.mpegurl mxu m4u
+video/vnd.ms-playready.media.pyv pyv
+video/vnd.nokia.interleaved-multimedia
+video/vnd.nokia.videovoip
+video/vnd.objectvideo
+video/vnd.sealed.mpeg1
+video/vnd.sealed.mpeg4
+video/vnd.sealed.swf
+video/vnd.sealedmedia.softseal.mov
+video/vnd.vivo viv
+video/x-fli fli
+video/x-ms-asf asf asx
+video/x-ms-wm wm
+video/x-ms-wmv wmv
+video/x-ms-wmx wmx
+video/x-ms-wvx wvx
+video/x-msvideo avi
+video/x-sgi-movie movie
+x-conference/x-cooltalk ice
--- /dev/null
+package Net::HTTP;
+
+use strict;
+use vars qw($VERSION @ISA $SOCKET_CLASS);
+
+$VERSION = "5.834";
+unless ($SOCKET_CLASS) {
+ eval { require IO::Socket::INET } || require IO::Socket;
+ $SOCKET_CLASS = "IO::Socket::INET";
+}
+require Net::HTTP::Methods;
+require Carp;
+
+@ISA = ($SOCKET_CLASS, 'Net::HTTP::Methods');
+
+sub new {
+ my $class = shift;
+ Carp::croak("No Host option provided") unless @_;
+ $class->SUPER::new(@_);
+}
+
+sub configure {
+ my($self, $cnf) = @_;
+ $self->http_configure($cnf);
+}
+
+sub http_connect {
+ my($self, $cnf) = @_;
+ $self->SUPER::configure($cnf);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Net::HTTP - Low-level HTTP connection (client)
+
+=head1 SYNOPSIS
+
+ use Net::HTTP;
+ my $s = Net::HTTP->new(Host => "www.perl.com") || die $@;
+ $s->write_request(GET => "/", 'User-Agent' => "Mozilla/5.0");
+ my($code, $mess, %h) = $s->read_response_headers;
+
+ while (1) {
+ my $buf;
+ my $n = $s->read_entity_body($buf, 1024);
+ die "read failed: $!" unless defined $n;
+ last unless $n;
+ print $buf;
+ }
+
+=head1 DESCRIPTION
+
+The C<Net::HTTP> class is a low-level HTTP client. An instance of the
+C<Net::HTTP> class represents a connection to an HTTP server. The
+HTTP protocol is described in RFC 2616. The C<Net::HTTP> class
+supports C<HTTP/1.0> and C<HTTP/1.1>.
+
+C<Net::HTTP> is a sub-class of C<IO::Socket::INET>. You can mix the
+methods described below with reading and writing from the socket
+directly. This is not necessary a good idea, unless you know what you
+are doing.
+
+The following methods are provided (in addition to those of
+C<IO::Socket::INET>):
+
+=over
+
+=item $s = Net::HTTP->new( %options )
+
+The C<Net::HTTP> constructor method takes the same options as
+C<IO::Socket::INET>'s as well as these:
+
+ Host: Initial host attribute value
+ KeepAlive: Initial keep_alive attribute value
+ SendTE: Initial send_te attribute_value
+ HTTPVersion: Initial http_version attribute value
+ PeerHTTPVersion: Initial peer_http_version attribute value
+ MaxLineLength: Initial max_line_length attribute value
+ MaxHeaderLines: Initial max_header_lines attribute value
+
+The C<Host> option is also the default for C<IO::Socket::INET>'s
+C<PeerAddr>. The C<PeerPort> defaults to 80 if not provided.
+
+The C<Listen> option provided by C<IO::Socket::INET>'s constructor
+method is not allowed.
+
+If unable to connect to the given HTTP server then the constructor
+returns C<undef> and $@ contains the reason. After a successful
+connect, a C<Net:HTTP> object is returned.
+
+=item $s->host
+
+Get/set the default value of the C<Host> header to send. The $host
+must not be set to an empty string (or C<undef>) for HTTP/1.1.
+
+=item $s->keep_alive
+
+Get/set the I<keep-alive> value. If this value is TRUE then the
+request will be sent with headers indicating that the server should try
+to keep the connection open so that multiple requests can be sent.
+
+The actual headers set will depend on the value of the C<http_version>
+and C<peer_http_version> attributes.
+
+=item $s->send_te
+
+Get/set the a value indicating if the request will be sent with a "TE"
+header to indicate the transfer encodings that the server can choose to
+use. The list of encodings announced as accepted by this client depends
+on availability of the following modules: C<Compress::Raw::Zlib> for
+I<deflate>, and C<IO::Compress::Gunzip> for I<gzip>.
+
+=item $s->http_version
+
+Get/set the HTTP version number that this client should announce.
+This value can only be set to "1.0" or "1.1". The default is "1.1".
+
+=item $s->peer_http_version
+
+Get/set the protocol version number of our peer. This value will
+initially be "1.0", but will be updated by a successful
+read_response_headers() method call.
+
+=item $s->max_line_length
+
+Get/set a limit on the length of response line and response header
+lines. The default is 8192. A value of 0 means no limit.
+
+=item $s->max_header_length
+
+Get/set a limit on the number of header lines that a response can
+have. The default is 128. A value of 0 means no limit.
+
+=item $s->format_request($method, $uri, %headers, [$content])
+
+Format a request message and return it as a string. If the headers do
+not include a C<Host> header, then a header is inserted with the value
+of the C<host> attribute. Headers like C<Connection> and
+C<Keep-Alive> might also be added depending on the status of the
+C<keep_alive> attribute.
+
+If $content is given (and it is non-empty), then a C<Content-Length>
+header is automatically added unless it was already present.
+
+=item $s->write_request($method, $uri, %headers, [$content])
+
+Format and send a request message. Arguments are the same as for
+format_request(). Returns true if successful.
+
+=item $s->format_chunk( $data )
+
+Returns the string to be written for the given chunk of data.
+
+=item $s->write_chunk($data)
+
+Will write a new chunk of request entity body data. This method
+should only be used if the C<Transfer-Encoding> header with a value of
+C<chunked> was sent in the request. Note, writing zero-length data is
+a no-op. Use the write_chunk_eof() method to signal end of entity
+body data.
+
+Returns true if successful.
+
+=item $s->format_chunk_eof( %trailers )
+
+Returns the string to be written for signaling EOF when a
+C<Transfer-Encoding> of C<chunked> is used.
+
+=item $s->write_chunk_eof( %trailers )
+
+Will write eof marker for chunked data and optional trailers. Note
+that trailers should not really be used unless is was signaled
+with a C<Trailer> header.
+
+Returns true if successful.
+
+=item ($code, $mess, %headers) = $s->read_response_headers( %opts )
+
+Read response headers from server and return it. The $code is the 3
+digit HTTP status code (see L<HTTP::Status>) and $mess is the textual
+message that came with it. Headers are then returned as key/value
+pairs. Since key letter casing is not normalized and the same key can
+even occur multiple times, assigning these values directly to a hash
+is not wise. Only the $code is returned if this method is called in
+scalar context.
+
+As a side effect this method updates the 'peer_http_version'
+attribute.
+
+Options might be passed in as key/value pairs. There are currently
+only two options supported; C<laxed> and C<junk_out>.
+
+The C<laxed> option will make read_response_headers() more forgiving
+towards servers that have not learned how to speak HTTP properly. The
+C<laxed> option is a boolean flag, and is enabled by passing in a TRUE
+value. The C<junk_out> option can be used to capture bad header lines
+when C<laxed> is enabled. The value should be an array reference.
+Bad header lines will be pushed onto the array.
+
+The C<laxed> option must be specified in order to communicate with
+pre-HTTP/1.0 servers that don't describe the response outcome or the
+data they send back with a header block. For these servers
+peer_http_version is set to "0.9" and this method returns (200,
+"Assumed OK").
+
+The method will raise an exception (die) if the server does not speak
+proper HTTP or if the C<max_line_length> or C<max_header_length>
+limits are reached. If the C<laxed> option is turned on and
+C<max_line_length> and C<max_header_length> checks are turned off,
+then no exception will be raised and this method will always
+return a response code.
+
+=item $n = $s->read_entity_body($buf, $size);
+
+Reads chunks of the entity body content. Basically the same interface
+as for read() and sysread(), but the buffer offset argument is not
+supported yet. This method should only be called after a successful
+read_response_headers() call.
+
+The return value will be C<undef> on read errors, 0 on EOF, -1 if no data
+could be returned this time, otherwise the number of bytes assigned
+to $buf. The $buf is set to "" when the return value is -1.
+
+You normally want to retry this call if this function returns either
+-1 or C<undef> with C<$!> as EINTR or EAGAIN (see L<Errno>). EINTR
+can happen if the application catches signals and EAGAIN can happen if
+you made the socket non-blocking.
+
+This method will raise exceptions (die) if the server does not speak
+proper HTTP. This can only happen when reading chunked data.
+
+=item %headers = $s->get_trailers
+
+After read_entity_body() has returned 0 to indicate end of the entity
+body, you might call this method to pick up any trailers.
+
+=item $s->_rbuf
+
+Get/set the read buffer content. The read_response_headers() and
+read_entity_body() methods use an internal buffer which they will look
+for data before they actually sysread more from the socket itself. If
+they read too much, the remaining data will be left in this buffer.
+
+=item $s->_rbuf_length
+
+Returns the number of bytes in the read buffer. This should always be
+the same as:
+
+ length($s->_rbuf)
+
+but might be more efficient.
+
+=back
+
+=head1 SUBCLASSING
+
+The read_response_headers() and read_entity_body() will invoke the
+sysread() method when they need more data. Subclasses might want to
+override this method to control how reading takes place.
+
+The object itself is a glob. Subclasses should avoid using hash key
+names prefixed with C<http_> and C<io_>.
+
+=head1 SEE ALSO
+
+L<LWP>, L<IO::Socket::INET>, L<Net::HTTP::NB>
+
+=head1 COPYRIGHT
+
+Copyright 2001-2003 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package Net::HTTP::Methods;
+
+require 5.005; # 4-arg substr
+
+use strict;
+use vars qw($VERSION);
+
+$VERSION = "5.834";
+
+my $CRLF = "\015\012"; # "\r\n" is not portable
+
+*_bytes = defined(&utf8::downgrade) ?
+ sub {
+ unless (utf8::downgrade($_[0], 1)) {
+ require Carp;
+ Carp::croak("Wide character in HTTP request (bytes required)");
+ }
+ return $_[0];
+ }
+ :
+ sub {
+ return $_[0];
+ };
+
+
+sub new {
+ my $class = shift;
+ unshift(@_, "Host") if @_ == 1;
+ my %cnf = @_;
+ require Symbol;
+ my $self = bless Symbol::gensym(), $class;
+ return $self->http_configure(\%cnf);
+}
+
+sub http_configure {
+ my($self, $cnf) = @_;
+
+ die "Listen option not allowed" if $cnf->{Listen};
+ my $explict_host = (exists $cnf->{Host});
+ my $host = delete $cnf->{Host};
+ my $peer = $cnf->{PeerAddr} || $cnf->{PeerHost};
+ if (!$peer) {
+ die "No Host option provided" unless $host;
+ $cnf->{PeerAddr} = $peer = $host;
+ }
+
+ if ($peer =~ s,:(\d+)$,,) {
+ $cnf->{PeerPort} = int($1); # always override
+ }
+ if (!$cnf->{PeerPort}) {
+ $cnf->{PeerPort} = $self->http_default_port;
+ }
+
+ if (!$explict_host) {
+ $host = $peer;
+ $host =~ s/:.*//;
+ }
+ if ($host && $host !~ /:/) {
+ my $p = $cnf->{PeerPort};
+ $host .= ":$p" if $p != $self->http_default_port;
+ }
+
+ $cnf->{Proto} = 'tcp';
+
+ my $keep_alive = delete $cnf->{KeepAlive};
+ my $http_version = delete $cnf->{HTTPVersion};
+ $http_version = "1.1" unless defined $http_version;
+ my $peer_http_version = delete $cnf->{PeerHTTPVersion};
+ $peer_http_version = "1.0" unless defined $peer_http_version;
+ my $send_te = delete $cnf->{SendTE};
+ my $max_line_length = delete $cnf->{MaxLineLength};
+ $max_line_length = 8*1024 unless defined $max_line_length;
+ my $max_header_lines = delete $cnf->{MaxHeaderLines};
+ $max_header_lines = 128 unless defined $max_header_lines;
+
+ return undef unless $self->http_connect($cnf);
+
+ $self->host($host);
+ $self->keep_alive($keep_alive);
+ $self->send_te($send_te);
+ $self->http_version($http_version);
+ $self->peer_http_version($peer_http_version);
+ $self->max_line_length($max_line_length);
+ $self->max_header_lines($max_header_lines);
+
+ ${*$self}{'http_buf'} = "";
+
+ return $self;
+}
+
+sub http_default_port {
+ 80;
+}
+
+# set up property accessors
+for my $method (qw(host keep_alive send_te max_line_length max_header_lines peer_http_version)) {
+ my $prop_name = "http_" . $method;
+ no strict 'refs';
+ *$method = sub {
+ my $self = shift;
+ my $old = ${*$self}{$prop_name};
+ ${*$self}{$prop_name} = shift if @_;
+ return $old;
+ };
+}
+
+# we want this one to be a bit smarter
+sub http_version {
+ my $self = shift;
+ my $old = ${*$self}{'http_version'};
+ if (@_) {
+ my $v = shift;
+ $v = "1.0" if $v eq "1"; # float
+ unless ($v eq "1.0" or $v eq "1.1") {
+ require Carp;
+ Carp::croak("Unsupported HTTP version '$v'");
+ }
+ ${*$self}{'http_version'} = $v;
+ }
+ $old;
+}
+
+sub format_request {
+ my $self = shift;
+ my $method = shift;
+ my $uri = shift;
+
+ my $content = (@_ % 2) ? pop : "";
+
+ for ($method, $uri) {
+ require Carp;
+ Carp::croak("Bad method or uri") if /\s/ || !length;
+ }
+
+ push(@{${*$self}{'http_request_method'}}, $method);
+ my $ver = ${*$self}{'http_version'};
+ my $peer_ver = ${*$self}{'http_peer_http_version'} || "1.0";
+
+ my @h;
+ my @connection;
+ my %given = (host => 0, "content-length" => 0, "te" => 0);
+ while (@_) {
+ my($k, $v) = splice(@_, 0, 2);
+ my $lc_k = lc($k);
+ if ($lc_k eq "connection") {
+ $v =~ s/^\s+//;
+ $v =~ s/\s+$//;
+ push(@connection, split(/\s*,\s*/, $v));
+ next;
+ }
+ if (exists $given{$lc_k}) {
+ $given{$lc_k}++;
+ }
+ push(@h, "$k: $v");
+ }
+
+ if (length($content) && !$given{'content-length'}) {
+ push(@h, "Content-Length: " . length($content));
+ }
+
+ my @h2;
+ if ($given{te}) {
+ push(@connection, "TE") unless grep lc($_) eq "te", @connection;
+ }
+ elsif ($self->send_te && gunzip_ok()) {
+ # gzip is less wanted since the IO::Uncompress::Gunzip interface for
+ # it does not really allow chunked decoding to take place easily.
+ push(@h2, "TE: deflate,gzip;q=0.3");
+ push(@connection, "TE");
+ }
+
+ unless (grep lc($_) eq "close", @connection) {
+ if ($self->keep_alive) {
+ if ($peer_ver eq "1.0") {
+ # from looking at Netscape's headers
+ push(@h2, "Keep-Alive: 300");
+ unshift(@connection, "Keep-Alive");
+ }
+ }
+ else {
+ push(@connection, "close") if $ver ge "1.1";
+ }
+ }
+ push(@h2, "Connection: " . join(", ", @connection)) if @connection;
+ unless ($given{host}) {
+ my $h = ${*$self}{'http_host'};
+ push(@h2, "Host: $h") if $h;
+ }
+
+ return _bytes(join($CRLF, "$method $uri HTTP/$ver", @h2, @h, "", $content));
+}
+
+
+sub write_request {
+ my $self = shift;
+ $self->print($self->format_request(@_));
+}
+
+sub format_chunk {
+ my $self = shift;
+ return $_[0] unless defined($_[0]) && length($_[0]);
+ return _bytes(sprintf("%x", length($_[0])) . $CRLF . $_[0] . $CRLF);
+}
+
+sub write_chunk {
+ my $self = shift;
+ return 1 unless defined($_[0]) && length($_[0]);
+ $self->print(_bytes(sprintf("%x", length($_[0])) . $CRLF . $_[0] . $CRLF));
+}
+
+sub format_chunk_eof {
+ my $self = shift;
+ my @h;
+ while (@_) {
+ push(@h, sprintf "%s: %s$CRLF", splice(@_, 0, 2));
+ }
+ return _bytes(join("", "0$CRLF", @h, $CRLF));
+}
+
+sub write_chunk_eof {
+ my $self = shift;
+ $self->print($self->format_chunk_eof(@_));
+}
+
+
+sub my_read {
+ die if @_ > 3;
+ my $self = shift;
+ my $len = $_[1];
+ for (${*$self}{'http_buf'}) {
+ if (length) {
+ $_[0] = substr($_, 0, $len, "");
+ return length($_[0]);
+ }
+ else {
+ return $self->sysread($_[0], $len);
+ }
+ }
+}
+
+
+sub my_readline {
+ my $self = shift;
+ my $what = shift;
+ for (${*$self}{'http_buf'}) {
+ my $max_line_length = ${*$self}{'http_max_line_length'};
+ my $pos;
+ while (1) {
+ # find line ending
+ $pos = index($_, "\012");
+ last if $pos >= 0;
+ die "$what line too long (limit is $max_line_length)"
+ if $max_line_length && length($_) > $max_line_length;
+
+ # need to read more data to find a line ending
+ READ:
+ {
+ my $n = $self->sysread($_, 1024, length);
+ unless (defined $n) {
+ redo READ if $!{EINTR};
+ if ($!{EAGAIN}) {
+ # Hmm, we must be reading from a non-blocking socket
+ # XXX Should really wait until this socket is readable,...
+ select(undef, undef, undef, 0.1); # but this will do for now
+ redo READ;
+ }
+ # if we have already accumulated some data let's at least
+ # return that as a line
+ die "$what read failed: $!" unless length;
+ }
+ unless ($n) {
+ return undef unless length;
+ return substr($_, 0, length, "");
+ }
+ }
+ }
+ die "$what line too long ($pos; limit is $max_line_length)"
+ if $max_line_length && $pos > $max_line_length;
+
+ my $line = substr($_, 0, $pos+1, "");
+ $line =~ s/(\015?\012)\z// || die "Assert";
+ return wantarray ? ($line, $1) : $line;
+ }
+}
+
+
+sub _rbuf {
+ my $self = shift;
+ if (@_) {
+ for (${*$self}{'http_buf'}) {
+ my $old;
+ $old = $_ if defined wantarray;
+ $_ = shift;
+ return $old;
+ }
+ }
+ else {
+ return ${*$self}{'http_buf'};
+ }
+}
+
+sub _rbuf_length {
+ my $self = shift;
+ return length ${*$self}{'http_buf'};
+}
+
+
+sub _read_header_lines {
+ my $self = shift;
+ my $junk_out = shift;
+
+ my @headers;
+ my $line_count = 0;
+ my $max_header_lines = ${*$self}{'http_max_header_lines'};
+ while (my $line = my_readline($self, 'Header')) {
+ if ($line =~ /^(\S+?)\s*:\s*(.*)/s) {
+ push(@headers, $1, $2);
+ }
+ elsif (@headers && $line =~ s/^\s+//) {
+ $headers[-1] .= " " . $line;
+ }
+ elsif ($junk_out) {
+ push(@$junk_out, $line);
+ }
+ else {
+ die "Bad header: '$line'\n";
+ }
+ if ($max_header_lines) {
+ $line_count++;
+ if ($line_count >= $max_header_lines) {
+ die "Too many header lines (limit is $max_header_lines)";
+ }
+ }
+ }
+ return @headers;
+}
+
+
+sub read_response_headers {
+ my($self, %opt) = @_;
+ my $laxed = $opt{laxed};
+
+ my($status, $eol) = my_readline($self, 'Status');
+ unless (defined $status) {
+ die "Server closed connection without sending any data back";
+ }
+
+ my($peer_ver, $code, $message) = split(/\s+/, $status, 3);
+ if (!$peer_ver || $peer_ver !~ s,^HTTP/,, || $code !~ /^[1-5]\d\d$/) {
+ die "Bad response status line: '$status'" unless $laxed;
+ # assume HTTP/0.9
+ ${*$self}{'http_peer_http_version'} = "0.9";
+ ${*$self}{'http_status'} = "200";
+ substr(${*$self}{'http_buf'}, 0, 0) = $status . ($eol || "");
+ return 200 unless wantarray;
+ return (200, "Assumed OK");
+ };
+
+ ${*$self}{'http_peer_http_version'} = $peer_ver;
+ ${*$self}{'http_status'} = $code;
+
+ my $junk_out;
+ if ($laxed) {
+ $junk_out = $opt{junk_out} || [];
+ }
+ my @headers = $self->_read_header_lines($junk_out);
+
+ # pick out headers that read_entity_body might need
+ my @te;
+ my $content_length;
+ for (my $i = 0; $i < @headers; $i += 2) {
+ my $h = lc($headers[$i]);
+ if ($h eq 'transfer-encoding') {
+ my $te = $headers[$i+1];
+ $te =~ s/^\s+//;
+ $te =~ s/\s+$//;
+ push(@te, $te) if length($te);
+ }
+ elsif ($h eq 'content-length') {
+ # ignore bogus and overflow values
+ if ($headers[$i+1] =~ /^\s*(\d{1,15})(?:\s|$)/) {
+ $content_length = $1;
+ }
+ }
+ }
+ ${*$self}{'http_te'} = join(",", @te);
+ ${*$self}{'http_content_length'} = $content_length;
+ ${*$self}{'http_first_body'}++;
+ delete ${*$self}{'http_trailers'};
+ return $code unless wantarray;
+ return ($code, $message, @headers);
+}
+
+
+sub read_entity_body {
+ my $self = shift;
+ my $buf_ref = \$_[0];
+ my $size = $_[1];
+ die "Offset not supported yet" if $_[2];
+
+ my $chunked;
+ my $bytes;
+
+ if (${*$self}{'http_first_body'}) {
+ ${*$self}{'http_first_body'} = 0;
+ delete ${*$self}{'http_chunked'};
+ delete ${*$self}{'http_bytes'};
+ my $method = shift(@{${*$self}{'http_request_method'}});
+ my $status = ${*$self}{'http_status'};
+ if ($method eq "HEAD") {
+ # this response is always empty regardless of other headers
+ $bytes = 0;
+ }
+ elsif (my $te = ${*$self}{'http_te'}) {
+ my @te = split(/\s*,\s*/, lc($te));
+ die "Chunked must be last Transfer-Encoding '$te'"
+ unless pop(@te) eq "chunked";
+
+ for (@te) {
+ if ($_ eq "deflate" && inflate_ok()) {
+ #require Compress::Raw::Zlib;
+ my ($i, $status) = Compress::Raw::Zlib::Inflate->new();
+ die "Can't make inflator: $status" unless $i;
+ $_ = sub { my $out; $i->inflate($_[0], \$out); $out }
+ }
+ elsif ($_ eq "gzip" && gunzip_ok()) {
+ #require IO::Uncompress::Gunzip;
+ my @buf;
+ $_ = sub {
+ push(@buf, $_[0]);
+ return "" unless $_[1];
+ my $input = join("", @buf);
+ my $output;
+ IO::Uncompress::Gunzip::gunzip(\$input, \$output, Transparent => 0)
+ or die "Can't gunzip content: $IO::Uncompress::Gunzip::GunzipError";
+ return \$output;
+ };
+ }
+ elsif ($_ eq "identity") {
+ $_ = sub { $_[0] };
+ }
+ else {
+ die "Can't handle transfer encoding '$te'";
+ }
+ }
+
+ @te = reverse(@te);
+
+ ${*$self}{'http_te2'} = @te ? \@te : "";
+ $chunked = -1;
+ }
+ elsif (defined(my $content_length = ${*$self}{'http_content_length'})) {
+ $bytes = $content_length;
+ }
+ elsif ($status =~ /^(?:1|[23]04)/) {
+ # RFC 2616 says that these responses should always be empty
+ # but that does not appear to be true in practice [RT#17907]
+ $bytes = 0;
+ }
+ else {
+ # XXX Multi-Part types are self delimiting, but RFC 2616 says we
+ # only has to deal with 'multipart/byteranges'
+
+ # Read until EOF
+ }
+ }
+ else {
+ $chunked = ${*$self}{'http_chunked'};
+ $bytes = ${*$self}{'http_bytes'};
+ }
+
+ if (defined $chunked) {
+ # The state encoded in $chunked is:
+ # $chunked == 0: read CRLF after chunk, then chunk header
+ # $chunked == -1: read chunk header
+ # $chunked > 0: bytes left in current chunk to read
+
+ if ($chunked <= 0) {
+ my $line = my_readline($self, 'Entity body');
+ if ($chunked == 0) {
+ die "Missing newline after chunk data: '$line'"
+ if !defined($line) || $line ne "";
+ $line = my_readline($self, 'Entity body');
+ }
+ die "EOF when chunk header expected" unless defined($line);
+ my $chunk_len = $line;
+ $chunk_len =~ s/;.*//; # ignore potential chunk parameters
+ unless ($chunk_len =~ /^([\da-fA-F]+)\s*$/) {
+ die "Bad chunk-size in HTTP response: $line";
+ }
+ $chunked = hex($1);
+ if ($chunked == 0) {
+ ${*$self}{'http_trailers'} = [$self->_read_header_lines];
+ $$buf_ref = "";
+
+ my $n = 0;
+ if (my $transforms = delete ${*$self}{'http_te2'}) {
+ for (@$transforms) {
+ $$buf_ref = &$_($$buf_ref, 1);
+ }
+ $n = length($$buf_ref);
+ }
+
+ # in case somebody tries to read more, make sure we continue
+ # to return EOF
+ delete ${*$self}{'http_chunked'};
+ ${*$self}{'http_bytes'} = 0;
+
+ return $n;
+ }
+ }
+
+ my $n = $chunked;
+ $n = $size if $size && $size < $n;
+ $n = my_read($self, $$buf_ref, $n);
+ return undef unless defined $n;
+
+ ${*$self}{'http_chunked'} = $chunked - $n;
+
+ if ($n > 0) {
+ if (my $transforms = ${*$self}{'http_te2'}) {
+ for (@$transforms) {
+ $$buf_ref = &$_($$buf_ref, 0);
+ }
+ $n = length($$buf_ref);
+ $n = -1 if $n == 0;
+ }
+ }
+ return $n;
+ }
+ elsif (defined $bytes) {
+ unless ($bytes) {
+ $$buf_ref = "";
+ return 0;
+ }
+ my $n = $bytes;
+ $n = $size if $size && $size < $n;
+ $n = my_read($self, $$buf_ref, $n);
+ return undef unless defined $n;
+ ${*$self}{'http_bytes'} = $bytes - $n;
+ return $n;
+ }
+ else {
+ # read until eof
+ $size ||= 8*1024;
+ return my_read($self, $$buf_ref, $size);
+ }
+}
+
+sub get_trailers {
+ my $self = shift;
+ @{${*$self}{'http_trailers'} || []};
+}
+
+BEGIN {
+my $gunzip_ok;
+my $inflate_ok;
+
+sub gunzip_ok {
+ return $gunzip_ok if defined $gunzip_ok;
+
+ # Try to load IO::Uncompress::Gunzip.
+ local $@;
+ local $SIG{__DIE__};
+ $gunzip_ok = 0;
+
+ eval {
+ require IO::Uncompress::Gunzip;
+ $gunzip_ok++;
+ };
+
+ return $gunzip_ok;
+}
+
+sub inflate_ok {
+ return $inflate_ok if defined $inflate_ok;
+
+ # Try to load Compress::Raw::Zlib.
+ local $@;
+ local $SIG{__DIE__};
+ $inflate_ok = 0;
+
+ eval {
+ require Compress::Raw::Zlib;
+ $inflate_ok++;
+ };
+
+ return $inflate_ok;
+}
+
+} # BEGIN
+
+1;
--- /dev/null
+package Net::HTTP::NB;
+
+use strict;
+use vars qw($VERSION @ISA);
+
+$VERSION = "5.810";
+
+require Net::HTTP;
+@ISA=qw(Net::HTTP);
+
+sub sysread {
+ my $self = $_[0];
+ if (${*$self}{'httpnb_read_count'}++) {
+ ${*$self}{'http_buf'} = ${*$self}{'httpnb_save'};
+ die "Multi-read\n";
+ }
+ my $buf;
+ my $offset = $_[3] || 0;
+ my $n = sysread($self, $_[1], $_[2], $offset);
+ ${*$self}{'httpnb_save'} .= substr($_[1], $offset);
+ return $n;
+}
+
+sub read_response_headers {
+ my $self = shift;
+ ${*$self}{'httpnb_read_count'} = 0;
+ ${*$self}{'httpnb_save'} = ${*$self}{'http_buf'};
+ my @h = eval { $self->SUPER::read_response_headers(@_) };
+ if ($@) {
+ return if $@ eq "Multi-read\n";
+ die;
+ }
+ return @h;
+}
+
+sub read_entity_body {
+ my $self = shift;
+ ${*$self}{'httpnb_read_count'} = 0;
+ ${*$self}{'httpnb_save'} = ${*$self}{'http_buf'};
+ # XXX I'm not so sure this does the correct thing in case of
+ # transfer-encoding tranforms
+ my $n = eval { $self->SUPER::read_entity_body(@_); };
+ if ($@) {
+ $_[0] = "";
+ return -1;
+ }
+ return $n;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Net::HTTP::NB - Non-blocking HTTP client
+
+=head1 SYNOPSIS
+
+ use Net::HTTP::NB;
+ my $s = Net::HTTP::NB->new(Host => "www.perl.com") || die $@;
+ $s->write_request(GET => "/");
+
+ use IO::Select;
+ my $sel = IO::Select->new($s);
+
+ READ_HEADER: {
+ die "Header timeout" unless $sel->can_read(10);
+ my($code, $mess, %h) = $s->read_response_headers;
+ redo READ_HEADER unless $code;
+ }
+
+ while (1) {
+ die "Body timeout" unless $sel->can_read(10);
+ my $buf;
+ my $n = $s->read_entity_body($buf, 1024);
+ last unless $n;
+ print $buf;
+ }
+
+=head1 DESCRIPTION
+
+Same interface as C<Net::HTTP> but it will never try multiple reads
+when the read_response_headers() or read_entity_body() methods are
+invoked. This make it possible to multiplex multiple Net::HTTP::NB
+using select without risk blocking.
+
+If read_response_headers() did not see enough data to complete the
+headers an empty list is returned.
+
+If read_entity_body() did not see new entity data in its read
+the value -1 is returned.
+
+=head1 SEE ALSO
+
+L<Net::HTTP>
+
+=head1 COPYRIGHT
+
+Copyright 2001 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package Net::HTTPS;
+
+use strict;
+use vars qw($VERSION $SSL_SOCKET_CLASS @ISA);
+
+$VERSION = "5.819";
+
+# Figure out which SSL implementation to use
+if ($SSL_SOCKET_CLASS) {
+ # somebody already set it
+}
+elsif ($Net::SSL::VERSION) {
+ $SSL_SOCKET_CLASS = "Net::SSL";
+}
+elsif ($IO::Socket::SSL::VERSION) {
+ $SSL_SOCKET_CLASS = "IO::Socket::SSL"; # it was already loaded
+}
+else {
+ eval { require Net::SSL; }; # from Crypt-SSLeay
+ if ($@) {
+ my $old_errsv = $@;
+ eval {
+ require IO::Socket::SSL;
+ };
+ if ($@) {
+ $old_errsv =~ s/\s\(\@INC contains:.*\)/)/g;
+ die $old_errsv . $@;
+ }
+ $SSL_SOCKET_CLASS = "IO::Socket::SSL";
+ }
+ else {
+ $SSL_SOCKET_CLASS = "Net::SSL";
+ }
+}
+
+require Net::HTTP::Methods;
+
+@ISA=($SSL_SOCKET_CLASS, 'Net::HTTP::Methods');
+
+sub configure {
+ my($self, $cnf) = @_;
+ $self->http_configure($cnf);
+}
+
+sub http_connect {
+ my($self, $cnf) = @_;
+ $self->SUPER::configure($cnf);
+}
+
+sub http_default_port {
+ 443;
+}
+
+# The underlying SSLeay classes fails to work if the socket is
+# placed in non-blocking mode. This override of the blocking
+# method makes sure it stays the way it was created.
+sub blocking { } # noop
+
+1;
--- /dev/null
+package WWW::RobotRules;
+
+$VERSION = "5.832";
+sub Version { $VERSION; }
+
+use strict;
+use URI ();
+
+
+
+sub new {
+ my($class, $ua) = @_;
+
+ # This ugly hack is needed to ensure backwards compatibility.
+ # The "WWW::RobotRules" class is now really abstract.
+ $class = "WWW::RobotRules::InCore" if $class eq "WWW::RobotRules";
+
+ my $self = bless { }, $class;
+ $self->agent($ua);
+ $self;
+}
+
+
+sub parse {
+ my($self, $robot_txt_uri, $txt, $fresh_until) = @_;
+ $robot_txt_uri = URI->new("$robot_txt_uri");
+ my $netloc = $robot_txt_uri->host . ":" . $robot_txt_uri->port;
+
+ $self->clear_rules($netloc);
+ $self->fresh_until($netloc, $fresh_until || (time + 365*24*3600));
+
+ my $ua;
+ my $is_me = 0; # 1 iff this record is for me
+ my $is_anon = 0; # 1 iff this record is for *
+ my $seen_disallow = 0; # watch for missing record separators
+ my @me_disallowed = (); # rules disallowed for me
+ my @anon_disallowed = (); # rules disallowed for *
+
+ # blank lines are significant, so turn CRLF into LF to avoid generating
+ # false ones
+ $txt =~ s/\015\012/\012/g;
+
+ # split at \012 (LF) or \015 (CR) (Mac text files have just CR for EOL)
+ for(split(/[\012\015]/, $txt)) {
+
+ # Lines containing only a comment are discarded completely, and
+ # therefore do not indicate a record boundary.
+ next if /^\s*\#/;
+
+ s/\s*\#.*//; # remove comments at end-of-line
+
+ if (/^\s*$/) { # blank line
+ last if $is_me; # That was our record. No need to read the rest.
+ $is_anon = 0;
+ $seen_disallow = 0;
+ }
+ elsif (/^\s*User-Agent\s*:\s*(.*)/i) {
+ $ua = $1;
+ $ua =~ s/\s+$//;
+
+ if ($seen_disallow) {
+ # treat as start of a new record
+ $seen_disallow = 0;
+ last if $is_me; # That was our record. No need to read the rest.
+ $is_anon = 0;
+ }
+
+ if ($is_me) {
+ # This record already had a User-agent that
+ # we matched, so just continue.
+ }
+ elsif ($ua eq '*') {
+ $is_anon = 1;
+ }
+ elsif($self->is_me($ua)) {
+ $is_me = 1;
+ }
+ }
+ elsif (/^\s*Disallow\s*:\s*(.*)/i) {
+ unless (defined $ua) {
+ warn "RobotRules <$robot_txt_uri>: Disallow without preceding User-agent\n" if $^W;
+ $is_anon = 1; # assume that User-agent: * was intended
+ }
+ my $disallow = $1;
+ $disallow =~ s/\s+$//;
+ $seen_disallow = 1;
+ if (length $disallow) {
+ my $ignore;
+ eval {
+ my $u = URI->new_abs($disallow, $robot_txt_uri);
+ $ignore++ if $u->scheme ne $robot_txt_uri->scheme;
+ $ignore++ if lc($u->host) ne lc($robot_txt_uri->host);
+ $ignore++ if $u->port ne $robot_txt_uri->port;
+ $disallow = $u->path_query;
+ $disallow = "/" unless length $disallow;
+ };
+ next if $@;
+ next if $ignore;
+ }
+
+ if ($is_me) {
+ push(@me_disallowed, $disallow);
+ }
+ elsif ($is_anon) {
+ push(@anon_disallowed, $disallow);
+ }
+ }
+ elsif (/\S\s*:/) {
+ # ignore
+ }
+ else {
+ warn "RobotRules <$robot_txt_uri>: Malformed record: <$_>\n" if $^W;
+ }
+ }
+
+ if ($is_me) {
+ $self->push_rules($netloc, @me_disallowed);
+ }
+ else {
+ $self->push_rules($netloc, @anon_disallowed);
+ }
+}
+
+
+#
+# Returns TRUE if the given name matches the
+# name of this robot
+#
+sub is_me {
+ my($self, $ua_line) = @_;
+ my $me = $self->agent;
+
+ # See whether my short-name is a substring of the
+ # "User-Agent: ..." line that we were passed:
+
+ if(index(lc($me), lc($ua_line)) >= 0) {
+ return 1;
+ }
+ else {
+ return '';
+ }
+}
+
+
+sub allowed {
+ my($self, $uri) = @_;
+ $uri = URI->new("$uri");
+
+ return 1 unless $uri->scheme eq 'http' or $uri->scheme eq 'https';
+ # Robots.txt applies to only those schemes.
+
+ my $netloc = $uri->host . ":" . $uri->port;
+
+ my $fresh_until = $self->fresh_until($netloc);
+ return -1 if !defined($fresh_until) || $fresh_until < time;
+
+ my $str = $uri->path_query;
+ my $rule;
+ for $rule ($self->rules($netloc)) {
+ return 1 unless length $rule;
+ return 0 if index($str, $rule) == 0;
+ }
+ return 1;
+}
+
+
+# The following methods must be provided by the subclass.
+sub agent;
+sub visit;
+sub no_visits;
+sub last_visits;
+sub fresh_until;
+sub push_rules;
+sub clear_rules;
+sub rules;
+sub dump;
+
+
+
+package WWW::RobotRules::InCore;
+
+use vars qw(@ISA);
+@ISA = qw(WWW::RobotRules);
+
+
+
+sub agent {
+ my ($self, $name) = @_;
+ my $old = $self->{'ua'};
+ if ($name) {
+ # Strip it so that it's just the short name.
+ # I.e., "FooBot" => "FooBot"
+ # "FooBot/1.2" => "FooBot"
+ # "FooBot/1.2 [http://foobot.int; foo@bot.int]" => "FooBot"
+
+ $name = $1 if $name =~ m/(\S+)/; # get first word
+ $name =~ s!/.*!!; # get rid of version
+ unless ($old && $old eq $name) {
+ delete $self->{'loc'}; # all old info is now stale
+ $self->{'ua'} = $name;
+ }
+ }
+ $old;
+}
+
+
+sub visit {
+ my($self, $netloc, $time) = @_;
+ return unless $netloc;
+ $time ||= time;
+ $self->{'loc'}{$netloc}{'last'} = $time;
+ my $count = \$self->{'loc'}{$netloc}{'count'};
+ if (!defined $$count) {
+ $$count = 1;
+ }
+ else {
+ $$count++;
+ }
+}
+
+
+sub no_visits {
+ my ($self, $netloc) = @_;
+ $self->{'loc'}{$netloc}{'count'};
+}
+
+
+sub last_visit {
+ my ($self, $netloc) = @_;
+ $self->{'loc'}{$netloc}{'last'};
+}
+
+
+sub fresh_until {
+ my ($self, $netloc, $fresh_until) = @_;
+ my $old = $self->{'loc'}{$netloc}{'fresh'};
+ if (defined $fresh_until) {
+ $self->{'loc'}{$netloc}{'fresh'} = $fresh_until;
+ }
+ $old;
+}
+
+
+sub push_rules {
+ my($self, $netloc, @rules) = @_;
+ push (@{$self->{'loc'}{$netloc}{'rules'}}, @rules);
+}
+
+
+sub clear_rules {
+ my($self, $netloc) = @_;
+ delete $self->{'loc'}{$netloc}{'rules'};
+}
+
+
+sub rules {
+ my($self, $netloc) = @_;
+ if (defined $self->{'loc'}{$netloc}{'rules'}) {
+ return @{$self->{'loc'}{$netloc}{'rules'}};
+ }
+ else {
+ return ();
+ }
+}
+
+
+sub dump
+{
+ my $self = shift;
+ for (keys %$self) {
+ next if $_ eq 'loc';
+ print "$_ = $self->{$_}\n";
+ }
+ for (keys %{$self->{'loc'}}) {
+ my @rules = $self->rules($_);
+ print "$_: ", join("; ", @rules), "\n";
+ }
+}
+
+
+1;
+
+__END__
+
+
+# Bender: "Well, I don't have anything else
+# planned for today. Let's get drunk!"
+
+=head1 NAME
+
+WWW::RobotRules - database of robots.txt-derived permissions
+
+=head1 SYNOPSIS
+
+ use WWW::RobotRules;
+ my $rules = WWW::RobotRules->new('MOMspider/1.0');
+
+ use LWP::Simple qw(get);
+
+ {
+ my $url = "http://some.place/robots.txt";
+ my $robots_txt = get $url;
+ $rules->parse($url, $robots_txt) if defined $robots_txt;
+ }
+
+ {
+ my $url = "http://some.other.place/robots.txt";
+ my $robots_txt = get $url;
+ $rules->parse($url, $robots_txt) if defined $robots_txt;
+ }
+
+ # Now we can check if a URL is valid for those servers
+ # whose "robots.txt" files we've gotten and parsed:
+ if($rules->allowed($url)) {
+ $c = get $url;
+ ...
+ }
+
+=head1 DESCRIPTION
+
+This module parses F</robots.txt> files as specified in
+"A Standard for Robot Exclusion", at
+<http://www.robotstxt.org/wc/norobots.html>
+Webmasters can use the F</robots.txt> file to forbid conforming
+robots from accessing parts of their web site.
+
+The parsed files are kept in a WWW::RobotRules object, and this object
+provides methods to check if access to a given URL is prohibited. The
+same WWW::RobotRules object can be used for one or more parsed
+F</robots.txt> files on any number of hosts.
+
+The following methods are provided:
+
+=over 4
+
+=item $rules = WWW::RobotRules->new($robot_name)
+
+This is the constructor for WWW::RobotRules objects. The first
+argument given to new() is the name of the robot.
+
+=item $rules->parse($robot_txt_url, $content, $fresh_until)
+
+The parse() method takes as arguments the URL that was used to
+retrieve the F</robots.txt> file, and the contents of the file.
+
+=item $rules->allowed($uri)
+
+Returns TRUE if this robot is allowed to retrieve this URL.
+
+=item $rules->agent([$name])
+
+Get/set the agent name. NOTE: Changing the agent name will clear the robots.txt
+rules and expire times out of the cache.
+
+=back
+
+=head1 ROBOTS.TXT
+
+The format and semantics of the "/robots.txt" file are as follows
+(this is an edited abstract of
+<http://www.robotstxt.org/wc/norobots.html>):
+
+The file consists of one or more records separated by one or more
+blank lines. Each record contains lines of the form
+
+ <field-name>: <value>
+
+The field name is case insensitive. Text after the '#' character on a
+line is ignored during parsing. This is used for comments. The
+following <field-names> can be used:
+
+=over 3
+
+=item User-Agent
+
+The value of this field is the name of the robot the record is
+describing access policy for. If more than one I<User-Agent> field is
+present the record describes an identical access policy for more than
+one robot. At least one field needs to be present per record. If the
+value is '*', the record describes the default access policy for any
+robot that has not not matched any of the other records.
+
+The I<User-Agent> fields must occur before the I<Disallow> fields. If a
+record contains a I<User-Agent> field after a I<Disallow> field, that
+constitutes a malformed record. This parser will assume that a blank
+line should have been placed before that I<User-Agent> field, and will
+break the record into two. All the fields before the I<User-Agent> field
+will constitute a record, and the I<User-Agent> field will be the first
+field in a new record.
+
+=item Disallow
+
+The value of this field specifies a partial URL that is not to be
+visited. This can be a full path, or a partial path; any URL that
+starts with this value will not be retrieved
+
+=back
+
+Unrecognized records are ignored.
+
+=head1 ROBOTS.TXT EXAMPLES
+
+The following example "/robots.txt" file specifies that no robots
+should visit any URL starting with "/cyberworld/map/" or "/tmp/":
+
+ User-agent: *
+ Disallow: /cyberworld/map/ # This is an infinite virtual URL space
+ Disallow: /tmp/ # these will soon disappear
+
+This example "/robots.txt" file specifies that no robots should visit
+any URL starting with "/cyberworld/map/", except the robot called
+"cybermapper":
+
+ User-agent: *
+ Disallow: /cyberworld/map/ # This is an infinite virtual URL space
+
+ # Cybermapper knows where to go.
+ User-agent: cybermapper
+ Disallow:
+
+This example indicates that no robots should visit this site further:
+
+ # go away
+ User-agent: *
+ Disallow: /
+
+This is an example of a malformed robots.txt file.
+
+ # robots.txt for ancientcastle.example.com
+ # I've locked myself away.
+ User-agent: *
+ Disallow: /
+ # The castle is your home now, so you can go anywhere you like.
+ User-agent: Belle
+ Disallow: /west-wing/ # except the west wing!
+ # It's good to be the Prince...
+ User-agent: Beast
+ Disallow:
+
+This file is missing the required blank lines between records.
+However, the intention is clear.
+
+=head1 SEE ALSO
+
+L<LWP::RobotUA>, L<WWW::RobotRules::AnyDBM_File>
--- /dev/null
+package WWW::RobotRules::AnyDBM_File;
+
+require WWW::RobotRules;
+@ISA = qw(WWW::RobotRules);
+$VERSION = "5.835";
+
+use Carp ();
+use AnyDBM_File;
+use Fcntl;
+use strict;
+
+=head1 NAME
+
+WWW::RobotRules::AnyDBM_File - Persistent RobotRules
+
+=head1 SYNOPSIS
+
+ require WWW::RobotRules::AnyDBM_File;
+ require LWP::RobotUA;
+
+ # Create a robot useragent that uses a diskcaching RobotRules
+ my $rules = WWW::RobotRules::AnyDBM_File->new( 'my-robot/1.0', 'cachefile' );
+ my $ua = WWW::RobotUA->new( 'my-robot/1.0', 'me@foo.com', $rules );
+
+ # Then just use $ua as usual
+ $res = $ua->request($req);
+
+=head1 DESCRIPTION
+
+This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
+package to implement persistent diskcaching of F<robots.txt> and host
+visit information.
+
+The constructor (the new() method) takes an extra argument specifying
+the name of the DBM file to use. If the DBM file already exists, then
+you can specify undef as agent name as the name can be obtained from
+the DBM database.
+
+=cut
+
+sub new
+{
+ my ($class, $ua, $file) = @_;
+ Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
+
+ my $self = bless { }, $class;
+ $self->{'filename'} = $file;
+ tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
+ or Carp::croak("Can't open $file: $!");
+
+ if ($ua) {
+ $self->agent($ua);
+ }
+ else {
+ # Try to obtain name from DBM file
+ $ua = $self->{'dbm'}{"|ua-name|"};
+ Carp::croak("No agent name specified") unless $ua;
+ }
+
+ $self;
+}
+
+sub agent {
+ my($self, $newname) = @_;
+ my $old = $self->{'dbm'}{"|ua-name|"};
+ if (defined $newname) {
+ $newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version
+ unless ($old && $old eq $newname) {
+ # Old info is now stale.
+ my $file = $self->{'filename'};
+ untie %{$self->{'dbm'}};
+ tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
+ %{$self->{'dbm'}} = ();
+ $self->{'dbm'}{"|ua-name|"} = $newname;
+ }
+ }
+ $old;
+}
+
+sub no_visits {
+ my ($self, $netloc) = @_;
+ my $t = $self->{'dbm'}{"$netloc|vis"};
+ return 0 unless $t;
+ (split(/;\s*/, $t))[0];
+}
+
+sub last_visit {
+ my ($self, $netloc) = @_;
+ my $t = $self->{'dbm'}{"$netloc|vis"};
+ return undef unless $t;
+ (split(/;\s*/, $t))[1];
+}
+
+sub fresh_until {
+ my ($self, $netloc, $fresh) = @_;
+ my $old = $self->{'dbm'}{"$netloc|exp"};
+ if ($old) {
+ $old =~ s/;.*//; # remove cleartext
+ }
+ if (defined $fresh) {
+ $fresh .= "; " . localtime($fresh);
+ $self->{'dbm'}{"$netloc|exp"} = $fresh;
+ }
+ $old;
+}
+
+sub visit {
+ my($self, $netloc, $time) = @_;
+ $time ||= time;
+
+ my $count = 0;
+ my $old = $self->{'dbm'}{"$netloc|vis"};
+ if ($old) {
+ my $last;
+ ($count,$last) = split(/;\s*/, $old);
+ $time = $last if $last > $time;
+ }
+ $count++;
+ $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
+}
+
+sub push_rules {
+ my($self, $netloc, @rules) = @_;
+ my $cnt = 1;
+ $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
+
+ foreach (@rules) {
+ $self->{'dbm'}{"$netloc|r$cnt"} = $_;
+ $cnt++;
+ }
+}
+
+sub clear_rules {
+ my($self, $netloc) = @_;
+ my $cnt = 1;
+ while ($self->{'dbm'}{"$netloc|r$cnt"}) {
+ delete $self->{'dbm'}{"$netloc|r$cnt"};
+ $cnt++;
+ }
+}
+
+sub rules {
+ my($self, $netloc) = @_;
+ my @rules = ();
+ my $cnt = 1;
+ while (1) {
+ my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
+ last unless $rule;
+ push(@rules, $rule);
+ $cnt++;
+ }
+ @rules;
+}
+
+sub dump
+{
+}
+
+1;
+
+=head1 SEE ALSO
+
+L<WWW::RobotRules>, L<LWP::RobotUA>
+
+=head1 AUTHORS
+
+Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>
+
+=cut
+
--- /dev/null
+=head1 NAME
+
+lwpcook - The libwww-perl cookbook
+
+=head1 DESCRIPTION
+
+This document contain some examples that show typical usage of the
+libwww-perl library. You should consult the documentation for the
+individual modules for more detail.
+
+All examples should be runnable programs. You can, in most cases, test
+the code sections by piping the program text directly to perl.
+
+
+
+=head1 GET
+
+It is very easy to use this library to just fetch documents from the
+net. The LWP::Simple module provides the get() function that return
+the document specified by its URL argument:
+
+ use LWP::Simple;
+ $doc = get 'http://www.linpro.no/lwp/';
+
+or, as a perl one-liner using the getprint() function:
+
+ perl -MLWP::Simple -e 'getprint "http://www.linpro.no/lwp/"'
+
+or, how about fetching the latest perl by running this command:
+
+ perl -MLWP::Simple -e '
+ getstore "ftp://ftp.sunet.se/pub/lang/perl/CPAN/src/latest.tar.gz",
+ "perl.tar.gz"'
+
+You will probably first want to find a CPAN site closer to you by
+running something like the following command:
+
+ perl -MLWP::Simple -e 'getprint "http://www.perl.com/perl/CPAN/CPAN.html"'
+
+Enough of this simple stuff! The LWP object oriented interface gives
+you more control over the request sent to the server. Using this
+interface you have full control over headers sent and how you want to
+handle the response returned.
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $ua->agent("$0/0.1 " . $ua->agent);
+ # $ua->agent("Mozilla/8.0") # pretend we are very capable browser
+
+ $req = HTTP::Request->new(GET => 'http://www.linpro.no/lwp');
+ $req->header('Accept' => 'text/html');
+
+ # send request
+ $res = $ua->request($req);
+
+ # check the outcome
+ if ($res->is_success) {
+ print $res->decoded_content;
+ }
+ else {
+ print "Error: " . $res->status_line . "\n";
+ }
+
+The lwp-request program (alias GET) that is distributed with the
+library can also be used to fetch documents from WWW servers.
+
+
+
+=head1 HEAD
+
+If you just want to check if a document is present (i.e. the URL is
+valid) try to run code that looks like this:
+
+ use LWP::Simple;
+
+ if (head($url)) {
+ # ok document exists
+ }
+
+The head() function really returns a list of meta-information about
+the document. The first three values of the list returned are the
+document type, the size of the document, and the age of the document.
+
+More control over the request or access to all header values returned
+require that you use the object oriented interface described for GET
+above. Just s/GET/HEAD/g.
+
+
+=head1 POST
+
+There is no simple procedural interface for posting data to a WWW server. You
+must use the object oriented interface for this. The most common POST
+operation is to access a WWW form application:
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+
+ my $req = HTTP::Request->new(POST => 'http://www.perl.com/cgi-bin/BugGlimpse');
+ $req->content_type('application/x-www-form-urlencoded');
+ $req->content('match=www&errors=0');
+
+ my $res = $ua->request($req);
+ print $res->as_string;
+
+Lazy people use the HTTP::Request::Common module to set up a suitable
+POST request message (it handles all the escaping issues) and has a
+suitable default for the content_type:
+
+ use HTTP::Request::Common qw(POST);
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+
+ my $req = POST 'http://www.perl.com/cgi-bin/BugGlimpse',
+ [ search => 'www', errors => 0 ];
+
+ print $ua->request($req)->as_string;
+
+The lwp-request program (alias POST) that is distributed with the
+library can also be used for posting data.
+
+
+
+=head1 PROXIES
+
+Some sites use proxies to go through fire wall machines, or just as
+cache in order to improve performance. Proxies can also be used for
+accessing resources through protocols not supported directly (or
+supported badly :-) by the libwww-perl library.
+
+You should initialize your proxy setting before you start sending
+requests:
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $ua->env_proxy; # initialize from environment variables
+ # or
+ $ua->proxy(ftp => 'http://proxy.myorg.com');
+ $ua->proxy(wais => 'http://proxy.myorg.com');
+ $ua->no_proxy(qw(no se fi));
+
+ my $req = HTTP::Request->new(GET => 'wais://xxx.com/');
+ print $ua->request($req)->as_string;
+
+The LWP::Simple interface will call env_proxy() for you automatically.
+Applications that use the $ua->env_proxy() method will normally not
+use the $ua->proxy() and $ua->no_proxy() methods.
+
+Some proxies also require that you send it a username/password in
+order to let requests through. You should be able to add the
+required header, with something like this:
+
+ use LWP::UserAgent;
+
+ $ua = LWP::UserAgent->new;
+ $ua->proxy(['http', 'ftp'] => 'http://username:password@proxy.myorg.com');
+
+ $req = HTTP::Request->new('GET',"http://www.perl.com");
+
+ $res = $ua->request($req);
+ print $res->decoded_content if $res->is_success;
+
+Replace C<proxy.myorg.com>, C<username> and
+C<password> with something suitable for your site.
+
+
+=head1 ACCESS TO PROTECTED DOCUMENTS
+
+Documents protected by basic authorization can easily be accessed
+like this:
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $req = HTTP::Request->new(GET => 'http://www.linpro.no/secret/');
+ $req->authorization_basic('aas', 'mypassword');
+ print $ua->request($req)->as_string;
+
+The other alternative is to provide a subclass of I<LWP::UserAgent> that
+overrides the get_basic_credentials() method. Study the I<lwp-request>
+program for an example of this.
+
+
+=head1 COOKIES
+
+Some sites like to play games with cookies. By default LWP ignores
+cookies provided by the servers it visits. LWP will collect cookies
+and respond to cookie requests if you set up a cookie jar.
+
+ use LWP::UserAgent;
+ use HTTP::Cookies;
+
+ $ua = LWP::UserAgent->new;
+ $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
+ autosave => 1));
+
+ # and then send requests just as you used to do
+ $res = $ua->request(HTTP::Request->new(GET => "http://www.yahoo.no"));
+ print $res->status_line, "\n";
+
+As you visit sites that send you cookies to keep, then the file
+F<lwpcookies.txt"> will grow.
+
+=head1 HTTPS
+
+URLs with https scheme are accessed in exactly the same way as with
+http scheme, provided that an SSL interface module for LWP has been
+properly installed (see the F<README.SSL> file found in the
+libwww-perl distribution for more details). If no SSL interface is
+installed for LWP to use, then you will get "501 Protocol scheme
+'https' is not supported" errors when accessing such URLs.
+
+Here's an example of fetching and printing a WWW page using SSL:
+
+ use LWP::UserAgent;
+
+ my $ua = LWP::UserAgent->new;
+ my $req = HTTP::Request->new(GET => 'https://www.helsinki.fi/');
+ my $res = $ua->request($req);
+ if ($res->is_success) {
+ print $res->as_string;
+ }
+ else {
+ print "Failed: ", $res->status_line, "\n";
+ }
+
+=head1 MIRRORING
+
+If you want to mirror documents from a WWW server, then try to run
+code similar to this at regular intervals:
+
+ use LWP::Simple;
+
+ %mirrors = (
+ 'http://www.sn.no/' => 'sn.html',
+ 'http://www.perl.com/' => 'perl.html',
+ 'http://www.sn.no/libwww-perl/' => 'lwp.html',
+ 'gopher://gopher.sn.no/' => 'gopher.html',
+ );
+
+ while (($url, $localfile) = each(%mirrors)) {
+ mirror($url, $localfile);
+ }
+
+Or, as a perl one-liner:
+
+ perl -MLWP::Simple -e 'mirror("http://www.perl.com/", "perl.html")';
+
+The document will not be transferred unless it has been updated.
+
+
+
+=head1 LARGE DOCUMENTS
+
+If the document you want to fetch is too large to be kept in memory,
+then you have two alternatives. You can instruct the library to write
+the document content to a file (second $ua->request() argument is a file
+name):
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+
+ my $req = HTTP::Request->new(GET =>
+ 'http://www.linpro.no/lwp/libwww-perl-5.46.tar.gz');
+ $res = $ua->request($req, "libwww-perl.tar.gz");
+ if ($res->is_success) {
+ print "ok\n";
+ }
+ else {
+ print $res->status_line, "\n";
+ }
+
+
+Or you can process the document as it arrives (second $ua->request()
+argument is a code reference):
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $URL = 'ftp://ftp.unit.no/pub/rfc/rfc-index.txt';
+
+ my $expected_length;
+ my $bytes_received = 0;
+ my $res =
+ $ua->request(HTTP::Request->new(GET => $URL),
+ sub {
+ my($chunk, $res) = @_;
+ $bytes_received += length($chunk);
+ unless (defined $expected_length) {
+ $expected_length = $res->content_length || 0;
+ }
+ if ($expected_length) {
+ printf STDERR "%d%% - ",
+ 100 * $bytes_received / $expected_length;
+ }
+ print STDERR "$bytes_received bytes received\n";
+
+ # XXX Should really do something with the chunk itself
+ # print $chunk;
+ });
+ print $res->status_line, "\n";
+
+
+
+=head1 COPYRIGHT
+
+Copyright 1996-2001, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+
--- /dev/null
+=head1 NAME
+
+lwptut -- An LWP Tutorial
+
+=head1 DESCRIPTION
+
+LWP (short for "Library for WWW in Perl") is a very popular group of
+Perl modules for accessing data on the Web. Like most Perl
+module-distributions, each of LWP's component modules comes with
+documentation that is a complete reference to its interface. However,
+there are so many modules in LWP that it's hard to know where to start
+looking for information on how to do even the simplest most common
+things.
+
+Really introducing you to using LWP would require a whole book -- a book
+that just happens to exist, called I<Perl & LWP>. But this article
+should give you a taste of how you can go about some common tasks with
+LWP.
+
+
+=head2 Getting documents with LWP::Simple
+
+If you just want to get what's at a particular URL, the simplest way
+to do it is LWP::Simple's functions.
+
+In a Perl program, you can call its C<get($url)> function. It will try
+getting that URL's content. If it works, then it'll return the
+content; but if there's some error, it'll return undef.
+
+ my $url = 'http://freshair.npr.org/dayFA.cfm?todayDate=current';
+ # Just an example: the URL for the most recent /Fresh Air/ show
+
+ use LWP::Simple;
+ my $content = get $url;
+ die "Couldn't get $url" unless defined $content;
+
+ # Then go do things with $content, like this:
+
+ if($content =~ m/jazz/i) {
+ print "They're talking about jazz today on Fresh Air!\n";
+ }
+ else {
+ print "Fresh Air is apparently jazzless today.\n";
+ }
+
+The handiest variant on C<get> is C<getprint>, which is useful in Perl
+one-liners. If it can get the page whose URL you provide, it sends it
+to STDOUT; otherwise it complains to STDERR.
+
+ % perl -MLWP::Simple -e "getprint 'http://cpan.org/RECENT'"
+
+That is the URL of a plain text file that lists new files in CPAN in
+the past two weeks. You can easily make it part of a tidy little
+shell command, like this one that mails you the list of new
+C<Acme::> modules:
+
+ % perl -MLWP::Simple -e "getprint 'http://cpan.org/RECENT'" \
+ | grep "/by-module/Acme" | mail -s "New Acme modules! Joy!" $USER
+
+There are other useful functions in LWP::Simple, including one function
+for running a HEAD request on a URL (useful for checking links, or
+getting the last-revised time of a URL), and two functions for
+saving/mirroring a URL to a local file. See L<the LWP::Simple
+documentation|LWP::Simple> for the full details, or chapter 2 of I<Perl
+& LWP> for more examples.
+
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 The Basics of the LWP Class Model
+
+LWP::Simple's functions are handy for simple cases, but its functions
+don't support cookies or authorization, don't support setting header
+lines in the HTTP request, generally don't support reading header lines
+in the HTTP response (notably the full HTTP error message, in case of an
+error). To get at all those features, you'll have to use the full LWP
+class model.
+
+While LWP consists of dozens of classes, the main two that you have to
+understand are L<LWP::UserAgent> and L<HTTP::Response>. LWP::UserAgent
+is a class for "virtual browsers" which you use for performing requests,
+and L<HTTP::Response> is a class for the responses (or error messages)
+that you get back from those requests.
+
+The basic idiom is C<< $response = $browser->get($url) >>, or more fully
+illustrated:
+
+ # Early in your program:
+
+ use LWP 5.64; # Loads all important LWP classes, and makes
+ # sure your version is reasonably recent.
+
+ my $browser = LWP::UserAgent->new;
+
+ ...
+
+ # Then later, whenever you need to make a get request:
+ my $url = 'http://freshair.npr.org/dayFA.cfm?todayDate=current';
+
+ my $response = $browser->get( $url );
+ die "Can't get $url -- ", $response->status_line
+ unless $response->is_success;
+
+ die "Hey, I was expecting HTML, not ", $response->content_type
+ unless $response->content_type eq 'text/html';
+ # or whatever content-type you're equipped to deal with
+
+ # Otherwise, process the content somehow:
+
+ if($response->decoded_content =~ m/jazz/i) {
+ print "They're talking about jazz today on Fresh Air!\n";
+ }
+ else {
+ print "Fresh Air is apparently jazzless today.\n";
+ }
+
+There are two objects involved: C<$browser>, which holds an object of
+class LWP::UserAgent, and then the C<$response> object, which is of
+class HTTP::Response. You really need only one browser object per
+program; but every time you make a request, you get back a new
+HTTP::Response object, which will have some interesting attributes:
+
+=over
+
+=item *
+
+A status code indicating
+success or failure
+(which you can test with C<< $response->is_success >>).
+
+=item *
+
+An HTTP status
+line that is hopefully informative if there's failure (which you can
+see with C<< $response->status_line >>,
+returning something like "404 Not Found").
+
+=item *
+
+A MIME content-type like "text/html", "image/gif",
+"application/xml", etc., which you can see with
+C<< $response->content_type >>
+
+=item *
+
+The actual content of the response, in C<< $response->decoded_content >>.
+If the response is HTML, that's where the HTML source will be; if
+it's a GIF, then C<< $response->decoded_content >> will be the binary
+GIF data.
+
+=item *
+
+And dozens of other convenient and more specific methods that are
+documented in the docs for L<HTTP::Response>, and its superclasses
+L<HTTP::Message> and L<HTTP::Headers>.
+
+=back
+
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 Adding Other HTTP Request Headers
+
+The most commonly used syntax for requests is C<< $response =
+$browser->get($url) >>, but in truth, you can add extra HTTP header
+lines to the request by adding a list of key-value pairs after the URL,
+like so:
+
+ $response = $browser->get( $url, $key1, $value1, $key2, $value2, ... );
+
+For example, here's how to send some more Netscape-like headers, in case
+you're dealing with a site that would otherwise reject your request:
+
+
+ my @ns_headers = (
+ 'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
+ 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
+ 'Accept-Charset' => 'iso-8859-1,*,utf-8',
+ 'Accept-Language' => 'en-US',
+ );
+
+ ...
+
+ $response = $browser->get($url, @ns_headers);
+
+If you weren't reusing that array, you could just go ahead and do this:
+
+ $response = $browser->get($url,
+ 'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
+ 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
+ 'Accept-Charset' => 'iso-8859-1,*,utf-8',
+ 'Accept-Language' => 'en-US',
+ );
+
+If you were only ever changing the 'User-Agent' line, you could just change
+the C<$browser> object's default line from "libwww-perl/5.65" (or the like)
+to whatever you like, using the LWP::UserAgent C<agent> method:
+
+ $browser->agent('Mozilla/4.76 [en] (Win98; U)');
+
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 Enabling Cookies
+
+A default LWP::UserAgent object acts like a browser with its cookies
+support turned off. There are various ways of turning it on, by setting
+its C<cookie_jar> attribute. A "cookie jar" is an object representing
+a little database of all
+the HTTP cookies that a browser can know about. It can correspond to a
+file on disk (the way Netscape uses its F<cookies.txt> file), or it can
+be just an in-memory object that starts out empty, and whose collection of
+cookies will disappear once the program is finished running.
+
+To give a browser an in-memory empty cookie jar, you set its C<cookie_jar>
+attribute like so:
+
+ $browser->cookie_jar({});
+
+To give it a copy that will be read from a file on disk, and will be saved
+to it when the program is finished running, set the C<cookie_jar> attribute
+like this:
+
+ use HTTP::Cookies;
+ $browser->cookie_jar( HTTP::Cookies->new(
+ 'file' => '/some/where/cookies.lwp',
+ # where to read/write cookies
+ 'autosave' => 1,
+ # save it to disk when done
+ ));
+
+That file will be an LWP-specific format. If you want to be access the
+cookies in your Netscape cookies file, you can use the
+HTTP::Cookies::Netscape class:
+
+ use HTTP::Cookies;
+ # yes, loads HTTP::Cookies::Netscape too
+
+ $browser->cookie_jar( HTTP::Cookies::Netscape->new(
+ 'file' => 'c:/Program Files/Netscape/Users/DIR-NAME-HERE/cookies.txt',
+ # where to read cookies
+ ));
+
+You could add an C<< 'autosave' => 1 >> line as further above, but at
+time of writing, it's uncertain whether Netscape might discard some of
+the cookies you could be writing back to disk.
+
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 Posting Form Data
+
+Many HTML forms send data to their server using an HTTP POST request, which
+you can send with this syntax:
+
+ $response = $browser->post( $url,
+ [
+ formkey1 => value1,
+ formkey2 => value2,
+ ...
+ ],
+ );
+
+Or if you need to send HTTP headers:
+
+ $response = $browser->post( $url,
+ [
+ formkey1 => value1,
+ formkey2 => value2,
+ ...
+ ],
+ headerkey1 => value1,
+ headerkey2 => value2,
+ );
+
+For example, the following program makes a search request to AltaVista
+(by sending some form data via an HTTP POST request), and extracts from
+the HTML the report of the number of matches:
+
+ use strict;
+ use warnings;
+ use LWP 5.64;
+ my $browser = LWP::UserAgent->new;
+
+ my $word = 'tarragon';
+
+ my $url = 'http://www.altavista.com/sites/search/web';
+ my $response = $browser->post( $url,
+ [ 'q' => $word, # the Altavista query string
+ 'pg' => 'q', 'avkw' => 'tgz', 'kl' => 'XX',
+ ]
+ );
+ die "$url error: ", $response->status_line
+ unless $response->is_success;
+ die "Weird content type at $url -- ", $response->content_type
+ unless $response->content_is_html;
+
+ if( $response->decoded_content =~ m{AltaVista found ([0-9,]+) results} ) {
+ # The substring will be like "AltaVista found 2,345 results"
+ print "$word: $1\n";
+ }
+ else {
+ print "Couldn't find the match-string in the response\n";
+ }
+
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 Sending GET Form Data
+
+Some HTML forms convey their form data not by sending the data
+in an HTTP POST request, but by making a normal GET request with
+the data stuck on the end of the URL. For example, if you went to
+C<imdb.com> and ran a search on "Blade Runner", the URL you'd see
+in your browser window would be:
+
+ http://us.imdb.com/Tsearch?title=Blade%20Runner&restrict=Movies+and+TV
+
+To run the same search with LWP, you'd use this idiom, which involves
+the URI class:
+
+ use URI;
+ my $url = URI->new( 'http://us.imdb.com/Tsearch' );
+ # makes an object representing the URL
+
+ $url->query_form( # And here the form data pairs:
+ 'title' => 'Blade Runner',
+ 'restrict' => 'Movies and TV',
+ );
+
+ my $response = $browser->get($url);
+
+See chapter 5 of I<Perl & LWP> for a longer discussion of HTML forms
+and of form data, and chapters 6 through 9 for a longer discussion of
+extracting data from HTML.
+
+
+
+=head2 Absolutizing URLs
+
+The URI class that we just mentioned above provides all sorts of methods
+for accessing and modifying parts of URLs (such as asking sort of URL it
+is with C<< $url->scheme >>, and asking what host it refers to with C<<
+$url->host >>, and so on, as described in L<the docs for the URI
+class|URI>. However, the methods of most immediate interest
+are the C<query_form> method seen above, and now the C<new_abs> method
+for taking a probably-relative URL string (like "../foo.html") and getting
+back an absolute URL (like "http://www.perl.com/stuff/foo.html"), as
+shown here:
+
+ use URI;
+ $abs = URI->new_abs($maybe_relative, $base);
+
+For example, consider this program that matches URLs in the HTML
+list of new modules in CPAN:
+
+ use strict;
+ use warnings;
+ use LWP;
+ my $browser = LWP::UserAgent->new;
+
+ my $url = 'http://www.cpan.org/RECENT.html';
+ my $response = $browser->get($url);
+ die "Can't get $url -- ", $response->status_line
+ unless $response->is_success;
+
+ my $html = $response->decoded_content;
+ while( $html =~ m/<A HREF=\"(.*?)\"/g ) {
+ print "$1\n";
+ }
+
+When run, it emits output that starts out something like this:
+
+ MIRRORING.FROM
+ RECENT
+ RECENT.html
+ authors/00whois.html
+ authors/01mailrc.txt.gz
+ authors/id/A/AA/AASSAD/CHECKSUMS
+ ...
+
+However, if you actually want to have those be absolute URLs, you
+can use the URI module's C<new_abs> method, by changing the C<while>
+loop to this:
+
+ while( $html =~ m/<A HREF=\"(.*?)\"/g ) {
+ print URI->new_abs( $1, $response->base ) ,"\n";
+ }
+
+(The C<< $response->base >> method from L<HTTP::Message|HTTP::Message>
+is for returning what URL
+should be used for resolving relative URLs -- it's usually just
+the same as the URL that you requested.)
+
+That program then emits nicely absolute URLs:
+
+ http://www.cpan.org/MIRRORING.FROM
+ http://www.cpan.org/RECENT
+ http://www.cpan.org/RECENT.html
+ http://www.cpan.org/authors/00whois.html
+ http://www.cpan.org/authors/01mailrc.txt.gz
+ http://www.cpan.org/authors/id/A/AA/AASSAD/CHECKSUMS
+ ...
+
+See chapter 4 of I<Perl & LWP> for a longer discussion of URI objects.
+
+Of course, using a regexp to match hrefs is a bit simplistic, and for
+more robust programs, you'll probably want to use an HTML-parsing module
+like L<HTML::LinkExtor> or L<HTML::TokeParser> or even maybe
+L<HTML::TreeBuilder>.
+
+
+
+
+=for comment
+ ##########################################################################
+
+=head2 Other Browser Attributes
+
+LWP::UserAgent objects have many attributes for controlling how they
+work. Here are a few notable ones:
+
+=over
+
+=item *
+
+C<< $browser->timeout(15); >>
+
+This sets this browser object to give up on requests that don't answer
+within 15 seconds.
+
+
+=item *
+
+C<< $browser->protocols_allowed( [ 'http', 'gopher'] ); >>
+
+This sets this browser object to not speak any protocols other than HTTP
+and gopher. If it tries accessing any other kind of URL (like an "ftp:"
+or "mailto:" or "news:" URL), then it won't actually try connecting, but
+instead will immediately return an error code 500, with a message like
+"Access to 'ftp' URIs has been disabled".
+
+
+=item *
+
+C<< use LWP::ConnCache; $browser->conn_cache(LWP::ConnCache->new()); >>
+
+This tells the browser object to try using the HTTP/1.1 "Keep-Alive"
+feature, which speeds up requests by reusing the same socket connection
+for multiple requests to the same server.
+
+
+=item *
+
+C<< $browser->agent( 'SomeName/1.23 (more info here maybe)' ) >>
+
+This changes how the browser object will identify itself in
+the default "User-Agent" line is its HTTP requests. By default,
+it'll send "libwww-perl/I<versionnumber>", like
+"libwww-perl/5.65". You can change that to something more descriptive
+like this:
+
+ $browser->agent( 'SomeName/3.14 (contact@robotplexus.int)' );
+
+Or if need be, you can go in disguise, like this:
+
+ $browser->agent( 'Mozilla/4.0 (compatible; MSIE 5.12; Mac_PowerPC)' );
+
+
+=item *
+
+C<< push @{ $ua->requests_redirectable }, 'POST'; >>
+
+This tells this browser to obey redirection responses to POST requests
+(like most modern interactive browsers), even though the HTTP RFC says
+that should not normally be done.
+
+
+=back
+
+
+For more options and information, see L<the full documentation for
+LWP::UserAgent|LWP::UserAgent>.
+
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 Writing Polite Robots
+
+If you want to make sure that your LWP-based program respects F<robots.txt>
+files and doesn't make too many requests too fast, you can use the LWP::RobotUA
+class instead of the LWP::UserAgent class.
+
+LWP::RobotUA class is just like LWP::UserAgent, and you can use it like so:
+
+ use LWP::RobotUA;
+ my $browser = LWP::RobotUA->new('YourSuperBot/1.34', 'you@yoursite.com');
+ # Your bot's name and your email address
+
+ my $response = $browser->get($url);
+
+But HTTP::RobotUA adds these features:
+
+
+=over
+
+=item *
+
+If the F<robots.txt> on C<$url>'s server forbids you from accessing
+C<$url>, then the C<$browser> object (assuming it's of class LWP::RobotUA)
+won't actually request it, but instead will give you back (in C<$response>) a 403 error
+with a message "Forbidden by robots.txt". That is, if you have this line:
+
+ die "$url -- ", $response->status_line, "\nAborted"
+ unless $response->is_success;
+
+then the program would die with an error message like this:
+
+ http://whatever.site.int/pith/x.html -- 403 Forbidden by robots.txt
+ Aborted at whateverprogram.pl line 1234
+
+=item *
+
+If this C<$browser> object sees that the last time it talked to
+C<$url>'s server was too recently, then it will pause (via C<sleep>) to
+avoid making too many requests too often. How long it will pause for, is
+by default one minute -- but you can control it with the C<<
+$browser->delay( I<minutes> ) >> attribute.
+
+For example, this code:
+
+ $browser->delay( 7/60 );
+
+...means that this browser will pause when it needs to avoid talking to
+any given server more than once every 7 seconds.
+
+=back
+
+For more options and information, see L<the full documentation for
+LWP::RobotUA|LWP::RobotUA>.
+
+
+
+
+
+=for comment
+ ##########################################################################
+
+=head2 Using Proxies
+
+In some cases, you will want to (or will have to) use proxies for
+accessing certain sites and/or using certain protocols. This is most
+commonly the case when your LWP program is running (or could be running)
+on a machine that is behind a firewall.
+
+To make a browser object use proxies that are defined in the usual
+environment variables (C<HTTP_PROXY>, etc.), just call the C<env_proxy>
+on a user-agent object before you go making any requests on it.
+Specifically:
+
+ use LWP::UserAgent;
+ my $browser = LWP::UserAgent->new;
+
+ # And before you go making any requests:
+ $browser->env_proxy;
+
+For more information on proxy parameters, see L<the LWP::UserAgent
+documentation|LWP::UserAgent>, specifically the C<proxy>, C<env_proxy>,
+and C<no_proxy> methods.
+
+
+
+=for comment
+ ##########################################################################
+
+=head2 HTTP Authentication
+
+Many web sites restrict access to documents by using "HTTP
+Authentication". This isn't just any form of "enter your password"
+restriction, but is a specific mechanism where the HTTP server sends the
+browser an HTTP code that says "That document is part of a protected
+'realm', and you can access it only if you re-request it and add some
+special authorization headers to your request".
+
+For example, the Unicode.org admins stop email-harvesting bots from
+harvesting the contents of their mailing list archives, by protecting
+them with HTTP Authentication, and then publicly stating the username
+and password (at C<http://www.unicode.org/mail-arch/>) -- namely
+username "unicode-ml" and password "unicode".
+
+For example, consider this URL, which is part of the protected
+area of the web site:
+
+ http://www.unicode.org/mail-arch/unicode-ml/y2002-m08/0067.html
+
+If you access that with a browser, you'll get a prompt
+like
+"Enter username and password for 'Unicode-MailList-Archives' at server
+'www.unicode.org'".
+
+In LWP, if you just request that URL, like this:
+
+ use LWP;
+ my $browser = LWP::UserAgent->new;
+
+ my $url =
+ 'http://www.unicode.org/mail-arch/unicode-ml/y2002-m08/0067.html';
+ my $response = $browser->get($url);
+
+ die "Error: ", $response->header('WWW-Authenticate') || 'Error accessing',
+ # ('WWW-Authenticate' is the realm-name)
+ "\n ", $response->status_line, "\n at $url\n Aborting"
+ unless $response->is_success;
+
+Then you'll get this error:
+
+ Error: Basic realm="Unicode-MailList-Archives"
+ 401 Authorization Required
+ at http://www.unicode.org/mail-arch/unicode-ml/y2002-m08/0067.html
+ Aborting at auth1.pl line 9. [or wherever]
+
+...because the C<$browser> doesn't know any the username and password
+for that realm ("Unicode-MailList-Archives") at that host
+("www.unicode.org"). The simplest way to let the browser know about this
+is to use the C<credentials> method to let it know about a username and
+password that it can try using for that realm at that host. The syntax is:
+
+ $browser->credentials(
+ 'servername:portnumber',
+ 'realm-name',
+ 'username' => 'password'
+ );
+
+In most cases, the port number is 80, the default TCP/IP port for HTTP; and
+you usually call the C<credentials> method before you make any requests.
+For example:
+
+ $browser->credentials(
+ 'reports.mybazouki.com:80',
+ 'web_server_usage_reports',
+ 'plinky' => 'banjo123'
+ );
+
+So if we add the following to the program above, right after the C<<
+$browser = LWP::UserAgent->new; >> line...
+
+ $browser->credentials( # add this to our $browser 's "key ring"
+ 'www.unicode.org:80',
+ 'Unicode-MailList-Archives',
+ 'unicode-ml' => 'unicode'
+ );
+
+...then when we run it, the request succeeds, instead of causing the
+C<die> to be called.
+
+
+
+=for comment
+ ##########################################################################
+
+=head2 Accessing HTTPS URLs
+
+When you access an HTTPS URL, it'll work for you just like an HTTP URL
+would -- if your LWP installation has HTTPS support (via an appropriate
+Secure Sockets Layer library). For example:
+
+ use LWP;
+ my $url = 'https://www.paypal.com/'; # Yes, HTTPS!
+ my $browser = LWP::UserAgent->new;
+ my $response = $browser->get($url);
+ die "Error at $url\n ", $response->status_line, "\n Aborting"
+ unless $response->is_success;
+ print "Whee, it worked! I got that ",
+ $response->content_type, " document!\n";
+
+If your LWP installation doesn't have HTTPS support set up, then the
+response will be unsuccessful, and you'll get this error message:
+
+ Error at https://www.paypal.com/
+ 501 Protocol scheme 'https' is not supported
+ Aborting at paypal.pl line 7. [or whatever program and line]
+
+If your LWP installation I<does> have HTTPS support installed, then the
+response should be successful, and you should be able to consult
+C<$response> just like with any normal HTTP response.
+
+For information about installing HTTPS support for your LWP
+installation, see the helpful F<README.SSL> file that comes in the
+libwww-perl distribution.
+
+
+=for comment
+ ##########################################################################
+
+
+
+=head2 Getting Large Documents
+
+When you're requesting a large (or at least potentially large) document,
+a problem with the normal way of using the request methods (like C<<
+$response = $browser->get($url) >>) is that the response object in
+memory will have to hold the whole document -- I<in memory>. If the
+response is a thirty megabyte file, this is likely to be quite an
+imposition on this process's memory usage.
+
+A notable alternative is to have LWP save the content to a file on disk,
+instead of saving it up in memory. This is the syntax to use:
+
+ $response = $ua->get($url,
+ ':content_file' => $filespec,
+ );
+
+For example,
+
+ $response = $ua->get('http://search.cpan.org/',
+ ':content_file' => '/tmp/sco.html'
+ );
+
+When you use this C<:content_file> option, the C<$response> will have
+all the normal header lines, but C<< $response->content >> will be
+empty.
+
+Note that this ":content_file" option isn't supported under older
+versions of LWP, so you should consider adding C<use LWP 5.66;> to check
+the LWP version, if you think your program might run on systems with
+older versions.
+
+If you need to be compatible with older LWP versions, then use
+this syntax, which does the same thing:
+
+ use HTTP::Request::Common;
+ $response = $ua->request( GET($url), $filespec );
+
+
+=for comment
+ ##########################################################################
+
+
+=head1 SEE ALSO
+
+Remember, this article is just the most rudimentary introduction to
+LWP -- to learn more about LWP and LWP-related tasks, you really
+must read from the following:
+
+=over
+
+=item *
+
+L<LWP::Simple> -- simple functions for getting/heading/mirroring URLs
+
+=item *
+
+L<LWP> -- overview of the libwww-perl modules
+
+=item *
+
+L<LWP::UserAgent> -- the class for objects that represent "virtual browsers"
+
+=item *
+
+L<HTTP::Response> -- the class for objects that represent the response to
+a LWP response, as in C<< $response = $browser->get(...) >>
+
+=item *
+
+L<HTTP::Message> and L<HTTP::Headers> -- classes that provide more methods
+to HTTP::Response.
+
+=item *
+
+L<URI> -- class for objects that represent absolute or relative URLs
+
+=item *
+
+L<URI::Escape> -- functions for URL-escaping and URL-unescaping strings
+(like turning "this & that" to and from "this%20%26%20that").
+
+=item *
+
+L<HTML::Entities> -- functions for HTML-escaping and HTML-unescaping strings
+(like turning "C. & E. BrontE<euml>" to and from "C. & E. Brontë")
+
+=item *
+
+L<HTML::TokeParser> and L<HTML::TreeBuilder> -- classes for parsing HTML
+
+=item *
+
+L<HTML::LinkExtor> -- class for finding links in HTML documents
+
+=item *
+
+The book I<Perl & LWP> by Sean M. Burke. O'Reilly & Associates,
+2002. ISBN: 0-596-00178-9, L<http://www.oreilly.com/catalog/perllwp/>. The
+whole book is also available free online:
+L<http://lwp.interglacial.com>.
+
+=back
+
+
+=head1 COPYRIGHT
+
+Copyright 2002, Sean M. Burke. You can redistribute this document and/or
+modify it, but only under the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=for comment
+ ##########################################################################
+
+=cut
+
+# End of Pod
--- /dev/null
+PKG_NAME := perl-libwww-per
+SPECFILE = $(addsuffix .spec, $(PKG_NAME))
+YAMLFILE = $(addsuffix .yaml, $(PKG_NAME))
+
+
+include /usr/share/packaging-tools/Makefile.common
--- /dev/null
+* Sat May 21 2011 Anas Nashif <anas.nashif@intel.com> - 5.836
+- Filter out unused modules
+
+* Tue Aug 17 2010 Quanxian Wang <quanxian.wang@intel.com> - 5.836
+- update to 5.836
+
+* Wed June 30 2010 Quanxian Wang <quanxian.wang@intel.com> - 5.834
+- Add %doc before man file entry
+
+* Thu Dec 24 2009 Passion Zhao <passion.zhao@intel.com> - 5.834
+- Update to 5.834
+
+* Wed Dec 23 2009 Anas Nashif <anas.nashif@intel.com> - 5.833
+- Update to 5.833
+
+* Thu Feb 19 2009 Zhu Yanhai <yanhai.zhu@intel.com>
+- update to 5.825
+- remove rt 38736 patch, it has already been applyed to the lastest src now.
+- update the patch which disables some tests.
--- /dev/null
+#
+# Do NOT Edit the Auto-generated Part!
+# Generated by: spectacle version 0.22
+#
+# >> macros
+# << macros
+
+Name: perl-libwww-perl
+Summary: A Perl interface to the World-Wide Web
+Version: 5.836
+Release: 1
+Group: Development/Libraries
+License: GPL+ or Artistic
+BuildArch: noarch
+URL: http://search.cpan.org/dist/libwww-perl/
+Source0: http://www.cpan.org/authors/id/G/GA/GAAS/libwww-perl-%{version}.tar.gz
+Source100: perl-libwww-perl.yaml
+Requires: perl(:MODULE_COMPAT_%(eval "`%{__perl} -V:version`"; echo $version))
+Requires: perl(Compress::Zlib)
+Requires: perl-HTML-Parser >= 3.33
+BuildRequires: perl(HTML::Entities)
+BuildRequires: perl(URI)
+BuildRequires: perl(Test::More)
+BuildRequires: perl(ExtUtils::MakeMaker)
+BuildRequires: perl(Compress::Zlib)
+
+
+%description
+The libwww-perl collection is a set of Perl modules which provides a
+simple and consistent application programming interface to the
+World-Wide Web. The main focus of the library is to provide classes
+and functions that allow you to write WWW clients. The library also
+contain modules that are of more general use and even classes that
+help you implement simple HTTP servers.
+
+
+
+
+%prep
+%setup -q -n libwww-perl-%{version}
+
+# >> setup
+# Filter the automatically generated dependencies.
+%{?filter_setup:
+%filter_from_requires /^perl(Win32)/d
+%filter_from_requires /^perl(Authen::NTLM)/d
+%filter_from_requires /^perl(HTTP::GHTTP)/d
+%?perl_default_filter
+}
+# << setup
+
+%build
+# >> build pre
+# << build pre
+
+if test -f Makefile.PL; then
+%{__perl} Makefile.PL INSTALLDIRS=vendor
+#####make %{?jobs:-j%jobs}
+make
+else
+%{__perl} Build.PL --installdirs vendor
+./Build
+fi
+
+# >> build post
+# << build post
+%install
+rm -rf %{buildroot}
+# >> install pre
+
+# Use system wide MIME types (link also to blib/... for "make test"). Doing
+# << install pre
+if test -f Makefile.PL; then
+make pure_install PERL_INSTALL_ROOT=%{buildroot}
+else
+./Build install --installdirs vendor
+fi
+find %{buildroot} -type f -name .packlist -exec rm -f {} ';'
+find %{buildroot} -depth -type d -exec rmdir {} 2>/dev/null ';'
+find %{buildroot} -type f -name '*.bs' -empty -exec rm -f {} ';'
+%{_fixperms} %{buildroot}/*
+
+# >> install post
+chmod -R u+w $RPM_BUILD_ROOT/*
+for file in $RPM_BUILD_ROOT%{_mandir}/man3/LWP.3pm AUTHORS README Changes; do
+iconv -f iso-8859-1 -t utf-8 < "$file" > "${file}_"
+mv -f "${file}_" "$file"
+done
+# but a copy of /etc/mime.types.
+for file in {blib/lib,$RPM_BUILD_ROOT%{perl_vendorlib}}/LWP/media.types ; do
+[ ! -f $file ] && echo ERROR && exit 1
+ln -sf /etc/mime.types $file
+done
+
+# << install post
+%check
+# >> check
+#make test
+# << check
+
+
+
+
+
+
+%files
+%defattr(-,root,root,-)
+# >> files
+%doc AUTHORS Changes README*
+%{_bindir}/*
+%{perl_vendorlib}/lwp*.pod
+%{perl_vendorlib}/LWP.pm
+%{perl_vendorlib}/Bundle/*
+%{perl_vendorlib}/File/*
+%{perl_vendorlib}/HTML/*
+%{perl_vendorlib}/HTTP/*
+%{perl_vendorlib}/LWP/*
+%{perl_vendorlib}/Net/*
+%{perl_vendorlib}/WWW/*
+%doc %{_mandir}/man1/*.1*
+%doc %{_mandir}/man3/*.3*
+# << files
+
+
--- /dev/null
+Name: perl-libwww-perl
+Summary: A Perl interface to the World-Wide Web
+Version: 5.836
+Release: 1
+Group: Development/Libraries
+License: GPL+ or Artistic
+URL: http://search.cpan.org/dist/libwww-perl/
+Sources:
+ - http://www.cpan.org/authors/id/G/GA/GAAS/libwww-perl-%{version}.tar.gz
+Description: |
+ The libwww-perl collection is a set of Perl modules which provides a
+ simple and consistent application programming interface to the
+ World-Wide Web. The main focus of the library is to provide classes
+ and functions that allow you to write WWW clients. The library also
+ contain modules that are of more general use and even classes that
+ help you implement simple HTTP servers.
+
+Requires:
+ - perl(Compress::Zlib)
+ - perl-HTML-Parser >= 3.33
+PkgBR:
+ - perl(HTML::Entities)
+ - perl(URI)
+ - perl(Test::More)
+ - perl(ExtUtils::MakeMaker)
+ - perl(Compress::Zlib)
+ - mailcap
+Configure: none
+Builder: perl
+BuildArch: noarch
+Check: yes
--- /dev/null
+Self test suite for the libwww-perl library
+-------------------------------------------
+
+Tests are invoked by running the ./TEST script, but usually you run
+the tests with "make test" at the top libwww-perl directory. Use -v
+option for verbose tests. You might run an individual test like this:
+
+ ./TEST -v base/date
+
+or all tests in a directory like this
+
+ ./TEST base
+
+You enable network tests by creating the "net/config.pl" file. A good
+start is to make a copy of the "net/config.pl.dist" file. For network
+tests you should also make a link from the cgi-bin directory of your
+http server to the "net/cgi-bin" directory. You might also have to
+check that the interpreter line (#!.../perl) in the scripts is ok for
+your system. The following setup works for my site:
+
+ ln -s `pwd`/net/cgi-bin ~www/cgi-bin/lwp
+ cp net/config.pl.dist net/config.pl
+ emacs net/config.pl # fix if necessary
+ ./TEST net
--- /dev/null
+#!/usr/local/bin/perl -w
+
+use strict;
+use Test::Harness;
+$Test::Harness::verbose = shift
+ if defined $ARGV[0] and $ARGV[0] =~ /^\d+$/ || $ARGV[0] eq "-v";
+
+my $formatter;
+if (@ARGV && $ARGV[0] =~ /^--formatter=/) {
+ (undef, $formatter) = split(/=/, shift, 2);
+ $formatter = "TAP::Formatter::$formatter" unless $formatter =~ /::/
+}
+
+# make sure we are in the "t" directory
+unless (-d "base") {
+ chdir "t" or die "Can't chdir: $!";
+
+ # fix all relative library locations
+ foreach (@INC) {
+ $_ = "../$_" unless m,^([a-z]:)?[/\\],i;
+ }
+}
+
+unshift(@INC, "../blib/lib", "../blib/arch");
+
+my @tests;
+if (@ARGV) {
+ for (@ARGV) {
+ if (-d $_) {
+ push(@tests, <$_/*.t>);
+ }
+ else {
+ $_ .= ".t" unless /\.t$/;
+ push(@tests, $_);
+ }
+ }
+}
+else {
+ @tests = (<base/*.t>, <html/*.t>, <robot/*.t>, <local/*.t>);
+ push(@tests, <live/*.t>) if -f "live/ENABLED";
+ push(@tests, <net/*.t>) if -f "net/config.pl";
+ @tests = grep !/jigsaw/, @tests; # service is not reliable any more
+}
+
+if ($formatter) {
+ use File::Path; File::Path::rmtree("tap");
+ $ENV{PERL_TEST_HARNESS_DUMP_TAP} = "tap";
+ require TAP::Harness;
+ my $harness = TAP::Harness->new({
+ formatter_class => $formatter,
+ merge => 1,
+ #timer => 1,
+ lib => \@INC,
+ });
+ $harness->runtests(@tests);
+}
+else {
+ runtests @tests;
+}
--- /dev/null
+#perl -w
+
+use Test;
+plan tests => 52;
+
+use HTTP::Request::Common;
+
+$r = GET 'http://www.sn.no/';
+print $r->as_string;
+
+ok($r->method, "GET");
+ok($r->uri, "http://www.sn.no/");
+
+$r = HEAD "http://www.sn.no/",
+ If_Match => 'abc',
+ From => 'aas@sn.no';
+print $r->as_string;
+
+ok($r->method, "HEAD");
+ok($r->uri->eq("http://www.sn.no"));
+
+ok($r->header('If-Match'), "abc");
+ok($r->header("from"), "aas\@sn.no");
+
+$r = PUT "http://www.sn.no",
+ Content => 'foo';
+print $r->as_string, "\n";
+
+ok($r->method, "PUT");
+ok($r->uri->host, "www.sn.no");
+
+ok(!defined($r->header("Content")));
+
+ok(${$r->content_ref}, "foo");
+ok($r->content, "foo");
+ok($r->content_length, 3);
+
+#--- Test POST requests ---
+
+$r = POST "http://www.sn.no", [foo => 'bar;baz',
+ baz => [qw(a b c)],
+ foo => 'zoo=&',
+ "space " => " + ",
+ ],
+ bar => 'foo';
+print $r->as_string, "\n";
+
+ok($r->method, "POST");
+ok($r->content_type, "application/x-www-form-urlencoded");
+ok($r->content_length, 58);
+ok($r->header("bar"), "foo");
+ok($r->content, "foo=bar%3Bbaz&baz=a&baz=b&baz=c&foo=zoo%3D%26&space+=+%2B+");
+
+$r = POST "mailto:gisle\@aas.no",
+ Subject => "Heisan",
+ Content_Type => "text/plain",
+ Content => "Howdy\n";
+#print $r->as_string;
+
+ok($r->method, "POST");
+ok($r->header("Subject"), "Heisan");
+ok($r->content, "Howdy\n");
+ok($r->content_type, "text/plain");
+
+#
+# POST for File upload
+#
+$file = "test-$$";
+open(FILE, ">$file") or die "Can't create $file: $!";
+print FILE "foo\nbar\nbaz\n";
+close(FILE);
+
+$r = POST 'http://www.perl.org/survey.cgi',
+ Content_Type => 'form-data',
+ Content => [ name => 'Gisle Aas',
+ email => 'gisle@aas.no',
+ gender => 'm',
+ born => '1964',
+ file => [$file],
+ ];
+#print $r->as_string;
+
+unlink($file) or warn "Can't unlink $file: $!";
+
+ok($r->method, "POST");
+ok($r->uri->path, "/survey.cgi");
+ok($r->content_type, "multipart/form-data");
+ok($r->header(Content_type) =~ /boundary="?([^"]+)"?/);
+$boundary = $1;
+
+$c = $r->content;
+$c =~ s/\r//g;
+@c = split(/--\Q$boundary/, $c);
+print "$c[5]\n";
+
+ok(@c == 7 and $c[6] =~ /^--\n/); # 5 parts + header & trailer
+
+ok($c[2] =~ /^Content-Disposition:\s*form-data;\s*name="email"/m);
+ok($c[2] =~ /^gisle\@aas.no$/m);
+
+ok($c[5] =~ /^Content-Disposition:\s*form-data;\s*name="file";\s*filename="$file"/m);
+ok($c[5] =~ /^Content-Type:\s*text\/plain$/m);
+ok($c[5] =~ /^foo\nbar\nbaz/m);
+
+$r = POST 'http://www.perl.org/survey.cgi',
+ [ file => [ undef, "xxy\"", Content_type => "text/html", Content => "<h1>Hello, world!</h1>" ]],
+ Content_type => 'multipart/form-data';
+print $r->as_string;
+
+ok($r->content =~ /^--\S+\015\012Content-Disposition:\s*form-data;\s*name="file";\s*filename="xxy\\"/m);
+ok($r->content =~ /^Content-Type: text\/html/m);
+ok($r->content =~ /^<h1>Hello, world/m);
+
+$r = POST 'http://www.perl.org/survey.cgi',
+ Content_type => 'multipart/form-data',
+ Content => [ file => [ undef, undef, Content => "foo"]];
+#print $r->as_string;
+
+ok($r->content !~ /filename=/);
+
+
+# The POST routine can now also take a hash reference.
+my %hash = (foo => 42, bar => 24);
+$r = POST 'http://www.perl.org/survey.cgi', \%hash;
+#print $r->as_string, "\n";
+ok($r->content =~ /foo=42/);
+ok($r->content =~ /bar=24/);
+ok($r->content_type, "application/x-www-form-urlencoded");
+ok($r->content_length, 13);
+
+
+#
+# POST for File upload
+#
+use HTTP::Request::Common qw($DYNAMIC_FILE_UPLOAD);
+
+$file = "test-$$";
+open(FILE, ">$file") or die "Can't create $file: $!";
+for (1..1000) {
+ print FILE "a" .. "z";
+}
+close(FILE);
+
+$DYNAMIC_FILE_UPLOAD++;
+$r = POST 'http://www.perl.org/survey.cgi',
+ Content_Type => 'form-data',
+ Content => [ name => 'Gisle Aas',
+ email => 'gisle@aas.no',
+ gender => 'm',
+ born => '1964',
+ file => [$file],
+ ];
+print $r->as_string, "\n";
+
+ok($r->method, "POST");
+ok($r->uri->path, "/survey.cgi");
+ok($r->content_type, "multipart/form-data");
+ok($r->header(Content_type) =~ /boundary="?([^"]+)"?/);
+$boundary = $1;
+ok(ref($r->content), "CODE");
+
+ok(length($boundary) > 10);
+
+$code = $r->content;
+my $chunk;
+my @chunks;
+while (defined($chunk = &$code) && length $chunk) {
+ push(@chunks, $chunk);
+}
+
+unlink($file) or warn "Can't unlink $file: $!";
+
+$_ = join("", @chunks);
+
+print int(@chunks), " chunks, total size is ", length($_), " bytes\n";
+
+# should be close to expected size and number of chunks
+ok(abs(@chunks - 15 < 3));
+ok(abs(length($_) - 26589) < 20);
+
+$r = POST 'http://www.example.com';
+ok($r->as_string, <<EOT);
+POST http://www.example.com
+Content-Length: 0
+Content-Type: application/x-www-form-urlencoded
+
+EOT
+
+$r = POST 'http://www.example.com', Content_Type => 'form-data', Content => [];
+ok($r->as_string, <<EOT);
+POST http://www.example.com
+Content-Length: 0
+Content-Type: multipart/form-data; boundary=none
+
+EOT
+
+$r = POST 'http://www.example.com', Content_Type => 'form-data';
+#print $r->as_string;
+ok($r->as_string, <<EOT);
+POST http://www.example.com
+Content-Length: 0
+Content-Type: multipart/form-data
+
+EOT
+
+$r = HTTP::Request::Common::DELETE 'http://www.example.com';
+ok($r->method, "DELETE");
+
+$r = HTTP::Request::Common::PUT 'http://www.example.com',
+ 'Content-Type' => 'application/octet-steam',
+ 'Content' => 'foobarbaz',
+ 'Content-Length' => 12; # a slight lie
+ok($r->header('Content-Length'), 12);
--- /dev/null
+#!perl -w
+
+use Test;
+plan tests => 66;
+
+use HTTP::Cookies;
+use HTTP::Request;
+use HTTP::Response;
+
+#-------------------------------------------------------------------
+# First we check that it works for the original example at
+# http://curl.haxx.se/rfc/cookie_spec.html
+
+# Client requests a document, and receives in the response:
+#
+# Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT
+#
+# When client requests a URL in path "/" on this server, it sends:
+#
+# Cookie: CUSTOMER=WILE_E_COYOTE
+#
+# Client requests a document, and receives in the response:
+#
+# Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/
+#
+# When client requests a URL in path "/" on this server, it sends:
+#
+# Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001
+#
+# Client receives:
+#
+# Set-Cookie: SHIPPING=FEDEX; path=/fo
+#
+# When client requests a URL in path "/" on this server, it sends:
+#
+# Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001
+#
+# When client requests a URL in path "/foo" on this server, it sends:
+#
+# Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001; SHIPPING=FEDEX
+#
+# The last Cookie is buggy, because both specifications says that the
+# most specific cookie must be sent first. SHIPPING=FEDEX is the
+# most specific and should thus be first.
+
+my $year_plus_one = (localtime)[5] + 1900 + 1;
+
+$c = HTTP::Cookies->new;
+
+$req = HTTP::Request->new(GET => "http://1.1.1.1/");
+$req->header("Host", "www.acme.com:80");
+
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+$res->header("Set-Cookie" => "CUSTOMER=WILE_E_COYOTE; path=/ ; expires=Wednesday, 09-Nov-$year_plus_one 23:12:40 GMT");
+#print $res->as_string;
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/");
+$c->add_cookie_header($req);
+
+ok($req->header("Cookie"), "CUSTOMER=WILE_E_COYOTE");
+ok($req->header("Cookie2"), "\$Version=\"1\"");
+
+$res->request($req);
+$res->header("Set-Cookie" => "PART_NUMBER=ROCKET_LAUNCHER_0001; path=/");
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/foo/bar");
+$c->add_cookie_header($req);
+
+$h = $req->header("Cookie");
+ok($h =~ /PART_NUMBER=ROCKET_LAUNCHER_0001/);
+ok($h =~ /CUSTOMER=WILE_E_COYOTE/);
+
+$res->request($req);
+$res->header("Set-Cookie", "SHIPPING=FEDEX; path=/foo");
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/");
+$c->add_cookie_header($req);
+
+$h = $req->header("Cookie");
+ok($h =~ /PART_NUMBER=ROCKET_LAUNCHER_0001/);
+ok($h =~ /CUSTOMER=WILE_E_COYOTE/);
+ok($h !~ /SHIPPING=FEDEX/);
+
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/foo/");
+$c->add_cookie_header($req);
+
+$h = $req->header("Cookie");
+ok($h =~ /PART_NUMBER=ROCKET_LAUNCHER_0001/);
+ok($h =~ /CUSTOMER=WILE_E_COYOTE/);
+ok($h =~ /^SHIPPING=FEDEX;/);
+
+print $c->as_string;
+
+
+# Second Example transaction sequence:
+#
+# Assume all mappings from above have been cleared.
+#
+# Client receives:
+#
+# Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/
+#
+# When client requests a URL in path "/" on this server, it sends:
+#
+# Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001
+#
+# Client receives:
+#
+# Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo
+#
+# When client requests a URL in path "/ammo" on this server, it sends:
+#
+# Cookie: PART_NUMBER=RIDING_ROCKET_0023; PART_NUMBER=ROCKET_LAUNCHER_0001
+#
+# NOTE: There are two name/value pairs named "PART_NUMBER" due to
+# the inheritance of the "/" mapping in addition to the "/ammo" mapping.
+
+$c = HTTP::Cookies->new; # clear it
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/");
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+$res->header("Set-Cookie", "PART_NUMBER=ROCKET_LAUNCHER_0001; path=/");
+
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/");
+$c->add_cookie_header($req);
+
+ok($req->header("Cookie"), "PART_NUMBER=ROCKET_LAUNCHER_0001");
+
+$res->request($req);
+$res->header("Set-Cookie", "PART_NUMBER=RIDING_ROCKET_0023; path=/ammo");
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/ammo");
+$c->add_cookie_header($req);
+
+ok($req->header("Cookie") =~
+ /^PART_NUMBER=RIDING_ROCKET_0023;\s*PART_NUMBER=ROCKET_LAUNCHER_0001/);
+
+print $c->as_string;
+undef($c);
+
+
+#-------------------------------------------------------------------
+# When there are no "Set-Cookie" header, then even responses
+# without any request URLs connected should be allowed.
+
+$c = HTTP::Cookies->new;
+$c->extract_cookies(HTTP::Response->new("200", "OK"));
+ok(count_cookies($c), 0);
+
+
+#-------------------------------------------------------------------
+# Then we test with the examples from RFC 2965.
+#
+# 5. EXAMPLES
+
+$c = HTTP::Cookies->new;
+
+#
+# 5.1 Example 1
+#
+# Most detail of request and response headers has been omitted. Assume
+# the user agent has no stored cookies.
+#
+# 1. User Agent -> Server
+#
+# POST /acme/login HTTP/1.1
+# [form data]
+#
+# User identifies self via a form.
+#
+# 2. Server -> User Agent
+#
+# HTTP/1.1 200 OK
+# Set-Cookie2: Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"
+#
+# Cookie reflects user's identity.
+
+$cookie = interact($c, 'http://www.acme.com/acme/login',
+ 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"');
+ok(!$cookie);
+
+#
+# 3. User Agent -> Server
+#
+# POST /acme/pickitem HTTP/1.1
+# Cookie: $Version="1"; Customer="WILE_E_COYOTE"; $Path="/acme"
+# [form data]
+#
+# User selects an item for ``shopping basket.''
+#
+# 4. Server -> User Agent
+#
+# HTTP/1.1 200 OK
+# Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1";
+# Path="/acme"
+#
+# Shopping basket contains an item.
+
+$cookie = interact($c, 'http://www.acme.com/acme/pickitem',
+ 'Part_Number="Rocket_Launcher_0001"; Version="1"; Path="/acme"');
+ok($cookie =~ m(^\$Version="?1"?; Customer="?WILE_E_COYOTE"?; \$Path="/acme"$));
+
+#
+# 5. User Agent -> Server
+#
+# POST /acme/shipping HTTP/1.1
+# Cookie: $Version="1";
+# Customer="WILE_E_COYOTE"; $Path="/acme";
+# Part_Number="Rocket_Launcher_0001"; $Path="/acme"
+# [form data]
+#
+# User selects shipping method from form.
+#
+# 6. Server -> User Agent
+#
+# HTTP/1.1 200 OK
+# Set-Cookie2: Shipping="FedEx"; Version="1"; Path="/acme"
+#
+# New cookie reflects shipping method.
+
+$cookie = interact($c, "http://www.acme.com/acme/shipping",
+ 'Shipping="FedEx"; Version="1"; Path="/acme"');
+
+ok($cookie =~ /^\$Version="?1"?;/);
+ok($cookie =~ /Part_Number="?Rocket_Launcher_0001"?;\s*\$Path="\/acme"/);
+ok($cookie =~ /Customer="?WILE_E_COYOTE"?;\s*\$Path="\/acme"/);
+
+#
+# 7. User Agent -> Server
+#
+# POST /acme/process HTTP/1.1
+# Cookie: $Version="1";
+# Customer="WILE_E_COYOTE"; $Path="/acme";
+# Part_Number="Rocket_Launcher_0001"; $Path="/acme";
+# Shipping="FedEx"; $Path="/acme"
+# [form data]
+#
+# User chooses to process order.
+#
+# 8. Server -> User Agent
+#
+# HTTP/1.1 200 OK
+#
+# Transaction is complete.
+
+$cookie = interact($c, "http://www.acme.com/acme/process");
+print "FINAL COOKIE: $cookie\n";
+ok($cookie =~ /Shipping="?FedEx"?;\s*\$Path="\/acme"/);
+ok($cookie =~ /WILE_E_COYOTE/);
+
+#
+# The user agent makes a series of requests on the origin server, after
+# each of which it receives a new cookie. All the cookies have the same
+# Path attribute and (default) domain. Because the request URLs all have
+# /acme as a prefix, and that matches the Path attribute, each request
+# contains all the cookies received so far.
+
+print $c->as_string;
+
+
+# 5.2 Example 2
+#
+# This example illustrates the effect of the Path attribute. All detail
+# of request and response headers has been omitted. Assume the user agent
+# has no stored cookies.
+
+$c = HTTP::Cookies->new;
+
+# Imagine the user agent has received, in response to earlier requests,
+# the response headers
+#
+# Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1";
+# Path="/acme"
+#
+# and
+#
+# Set-Cookie2: Part_Number="Riding_Rocket_0023"; Version="1";
+# Path="/acme/ammo"
+
+interact($c, "http://www.acme.com/acme/ammo/specific",
+ 'Part_Number="Rocket_Launcher_0001"; Version="1"; Path="/acme"',
+ 'Part_Number="Riding_Rocket_0023"; Version="1"; Path="/acme/ammo"');
+
+# A subsequent request by the user agent to the (same) server for URLs of
+# the form /acme/ammo/... would include the following request header:
+#
+# Cookie: $Version="1";
+# Part_Number="Riding_Rocket_0023"; $Path="/acme/ammo";
+# Part_Number="Rocket_Launcher_0001"; $Path="/acme"
+#
+# Note that the NAME=VALUE pair for the cookie with the more specific Path
+# attribute, /acme/ammo, comes before the one with the less specific Path
+# attribute, /acme. Further note that the same cookie name appears more
+# than once.
+
+$cookie = interact($c, "http://www.acme.com/acme/ammo/...");
+ok($cookie =~ /Riding_Rocket_0023.*Rocket_Launcher_0001/);
+
+# A subsequent request by the user agent to the (same) server for a URL of
+# the form /acme/parts/ would include the following request header:
+#
+# Cookie: $Version="1"; Part_Number="Rocket_Launcher_0001"; $Path="/acme"
+#
+# Here, the second cookie's Path attribute /acme/ammo is not a prefix of
+# the request URL, /acme/parts/, so the cookie does not get forwarded to
+# the server.
+
+$cookie = interact($c, "http://www.acme.com/acme/parts/");
+ok($cookie =~ /Rocket_Launcher_0001/);
+ok($cookie !~ /Riding_Rocket_0023/);
+
+print $c->as_string;
+
+#-----------------------------------------------------------------------
+
+# Test rejection of Set-Cookie2 responses based on domain, path or port
+
+$c = HTTP::Cookies->new;
+
+# illegal domain (no embedded dots)
+$cookie = interact($c, "http://www.acme.com", 'foo=bar; domain=".com"');
+ok(count_cookies($c), 0);
+
+# legal domain
+$cookie = interact($c, "http://www.acme.com", 'foo=bar; domain="acme.com"');
+ok(count_cookies($c), 1);
+
+# illegal domain (host prefix "www.a" contains a dot)
+$cookie = interact($c, "http://www.a.acme.com", 'foo=bar; domain="acme.com"');
+ok(count_cookies($c), 1);
+
+# legal domain
+$cookie = interact($c, "http://www.a.acme.com", 'foo=bar; domain=".a.acme.com"');
+ok(count_cookies($c), 2);
+
+# can't use a IP-address as domain
+$cookie = interact($c, "http://125.125.125.125", 'foo=bar; domain="125.125.125"');
+ok(count_cookies($c), 2);
+
+# illegal path (must be prefix of request path)
+$cookie = interact($c, "http://www.sol.no", 'foo=bar; domain=".sol.no"; path="/foo"');
+ok(count_cookies($c), 2);
+
+# legal path
+$cookie = interact($c, "http://www.sol.no/foo/bar", 'foo=bar; domain=".sol.no"; path="/foo"');
+ok(count_cookies($c), 3);
+
+# illegal port (request-port not in list)
+$cookie = interact($c, "http://www.sol.no", 'foo=bar; domain=".sol.no"; port="90,100"');
+ok(count_cookies($c), 3);
+
+# legal port
+$cookie = interact($c, "http://www.sol.no", 'foo=bar; domain=".sol.no"; port="90,100, 80,8080"; max-age=100; Comment = "Just kidding! (\"|\\\\) "');
+ok(count_cookies($c), 4);
+
+# port attribute without any value (current port)
+$cookie = interact($c, "http://www.sol.no", 'foo9=bar; domain=".sol.no"; port; max-age=100;');
+ok(count_cookies($c), 5);
+
+# encoded path
+$cookie = interact($c, "http://www.sol.no/foo/", 'foo8=bar; path="/%66oo"');
+ok(count_cookies($c), 6);
+
+my $file = "lwp-cookies-$$.txt";
+$c->save($file);
+$old = $c->as_string;
+undef($c);
+
+$c = HTTP::Cookies->new;
+$c->load($file);
+unlink($file) || warn "Can't unlink $file: $!";
+
+ok($old, $c->as_string);
+
+undef($c);
+
+#
+# Try some URL encodings of the PATHs
+#
+$c = HTTP::Cookies->new;
+interact($c, "http://www.acme.com/foo%2f%25/%40%40%0Anew%E5/%E5", 'foo = bar; version = 1');
+print $c->as_string;
+
+$cookie = interact($c, "http://www.acme.com/foo%2f%25/@@%0anewå/æøå", "bar=baz; path=\"/foo/\"; version=1");
+ok($cookie =~ /foo=bar/);
+ok($cookie =~ /^\$version=\"?1\"?/i);
+
+$cookie = interact($c, "http://www.acme.com/foo/%25/@@%0anewå/æøå");
+ok(!$cookie);
+
+undef($c);
+
+#
+# Try to use the Netscape cookie file format for saving
+#
+$file = "cookies-$$.txt";
+$c = HTTP::Cookies::Netscape->new(file => $file);
+interact($c, "http://www.acme.com/", "foo1=bar; max-age=100");
+interact($c, "http://www.acme.com/", "foo2=bar; port=\"80\"; max-age=100; Discard; Version=1");
+interact($c, "http://www.acme.com/", "foo3=bar; secure; Version=1");
+$c->save;
+undef($c);
+
+$c = HTTP::Cookies::Netscape->new(file => $file);
+ok(count_cookies($c), 1); # 2 of them discarded on save
+
+ok($c->as_string =~ /foo1=bar/);
+undef($c);
+unlink($file);
+
+
+#
+# Some additional Netscape cookies test
+#
+$c = HTTP::Cookies->new;
+$req = HTTP::Request->new(POST => "http://foo.bar.acme.com/foo");
+
+# Netscape allows a host part that contains dots
+$res = HTTP::Response->new(200, "OK");
+$res->header(set_cookie => 'Customer=WILE_E_COYOTE; domain=.acme.com');
+$res->request($req);
+$c->extract_cookies($res);
+
+# and that the domain is the same as the host without adding a leading
+# dot to the domain. Should not quote even if strange chars are used
+# in the cookie value.
+$res = HTTP::Response->new(200, "OK");
+$res->header(set_cookie => 'PART_NUMBER=3,4; domain=foo.bar.acme.com');
+$res->request($req);
+$c->extract_cookies($res);
+
+print $c->as_string;
+
+require URI;
+$req = HTTP::Request->new(POST => URI->new("http://foo.bar.acme.com/foo"));
+$c->add_cookie_header($req);
+#print $req->as_string;
+ok($req->header("Cookie") =~ /PART_NUMBER=3,4/);
+ok($req->header("Cookie") =~ /Customer=WILE_E_COYOTE/);
+
+
+# Test handling of local intranet hostnames without a dot
+$c->clear;
+print "---\n";
+
+interact($c, "http://example/", "foo1=bar; PORT; Discard;");
+$_=interact($c, "http://example/", 'foo2=bar; domain=".local"');
+ok(/foo1=bar/);
+
+$_=interact($c, "http://example/", 'foo3=bar');
+$_=interact($c, "http://example/");
+print "Cookie: $_\n";
+ok(/foo2=bar/);
+ok(count_cookies($c), 3);
+print $c->as_string;
+
+# Test for empty path
+# Broken web-server ORION/1.3.38 returns to the client response like
+#
+# Set-Cookie: JSESSIONID=ABCDERANDOM123; Path=
+#
+# e.g. with Path set to nothing.
+# In this case routine extract_cookies() must set cookie to / (root)
+print "---\n";
+print "Test for empty path...\n";
+$c = HTTP::Cookies->new; # clear it
+
+$req = HTTP::Request->new(GET => "http://www.ants.com/");
+
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+$res->header("Set-Cookie" => "JSESSIONID=ABCDERANDOM123; Path=");
+print $res->as_string;
+$c->extract_cookies($res);
+#print $c->as_string;
+
+$req = HTTP::Request->new(GET => "http://www.ants.com/");
+$c->add_cookie_header($req);
+#print $req->as_string;
+
+ok($req->header("Cookie"), "JSESSIONID=ABCDERANDOM123");
+ok($req->header("Cookie2"), "\$Version=\"1\"");
+
+
+# missing path in the request URI
+$req = HTTP::Request->new(GET => URI->new("http://www.ants.com:8080"));
+$c->add_cookie_header($req);
+#print $req->as_string;
+
+ok($req->header("Cookie"), "JSESSIONID=ABCDERANDOM123");
+ok($req->header("Cookie2"), "\$Version=\"1\"");
+
+# test mixing of Set-Cookie and Set-Cookie2 headers.
+# Example from http://www.trip.com/trs/trip/flighttracker/flight_tracker_home.xsl
+# which gives up these headers:
+#
+# HTTP/1.1 200 OK
+# Connection: close
+# Date: Fri, 20 Jul 2001 19:54:58 GMT
+# Server: Apache/1.3.19 (Unix) ApacheJServ/1.1.2
+# Content-Type: text/html
+# Content-Type: text/html; charset=iso-8859-1
+# Link: </trip/stylesheet.css>; rel="stylesheet"; type="text/css"
+# Servlet-Engine: Tomcat Web Server/3.2.1 (JSP 1.1; Servlet 2.2; Java 1.3.0; SunOS 5.8 sparc; java.vendor=Sun Microsystems Inc.)
+# Set-Cookie: trip.appServer=1111-0000-x-024;Domain=.trip.com;Path=/
+# Set-Cookie: JSESSIONID=fkumjm7nt1.JS24;Path=/trs
+# Set-Cookie2: JSESSIONID=fkumjm7nt1.JS24;Version=1;Discard;Path="/trs"
+# Title: TRIP.com Travel - FlightTRACKER
+# X-Meta-Description: Trip.com privacy policy
+# X-Meta-Keywords: privacy policy
+
+$req = HTTP::Request->new('GET', 'http://www.trip.com/trs/trip/flighttracker/flight_tracker_home.xsl');
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+$res->push_header("Set-Cookie" => qq(trip.appServer=1111-0000-x-024;Domain=.trip.com;Path=/));
+$res->push_header("Set-Cookie" => qq(JSESSIONID=fkumjm7nt1.JS24;Path=/trs));
+$res->push_header("Set-Cookie2" => qq(JSESSIONID=fkumjm7nt1.JS24;Version=1;Discard;Path="/trs"));
+#print $res->as_string;
+
+$c = HTTP::Cookies->new; # clear it
+$c->extract_cookies($res);
+print $c->as_string;
+ok($c->as_string, <<'EOT');
+Set-Cookie3: trip.appServer=1111-0000-x-024; path="/"; domain=.trip.com; path_spec; discard; version=0
+Set-Cookie3: JSESSIONID=fkumjm7nt1.JS24; path="/trs"; domain=www.trip.com; path_spec; discard; version=1
+EOT
+
+#-------------------------------------------------------------------
+# Test if temporary cookies are deleted properly with
+# $jar->clear_temporary_cookies()
+
+$req = HTTP::Request->new('GET', 'http://www.perlmeister.com/scripts');
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+ # Set session/perm cookies and mark their values as "session" vs. "perm"
+ # to recognize them later
+$res->push_header("Set-Cookie" => qq(s1=session;Path=/scripts));
+$res->push_header("Set-Cookie" => qq(p1=perm; Domain=.perlmeister.com;Path=/;expires=Fri, 02-Feb-$year_plus_one 23:24:20 GMT));
+$res->push_header("Set-Cookie" => qq(p2=perm;Path=/;expires=Fri, 02-Feb-$year_plus_one 23:24:20 GMT));
+$res->push_header("Set-Cookie" => qq(s2=session;Path=/scripts;Domain=.perlmeister.com));
+$res->push_header("Set-Cookie2" => qq(s3=session;Version=1;Discard;Path="/"));
+
+$c = HTTP::Cookies->new; # clear jar
+$c->extract_cookies($res);
+# How many session/permanent cookies do we have?
+my %counter = ("session_after" => 0);
+$c->scan( sub { $counter{"${_[2]}_before"}++ } );
+$c->clear_temporary_cookies();
+# How many now?
+$c->scan( sub { $counter{"${_[2]}_after"}++ } );
+ok($counter{"perm_after"}, $counter{"perm_before"}); # a permanent cookie got lost accidently
+ok($counter{"session_after"}, 0); # a session cookie hasn't been cleared
+ok($counter{"session_before"}, 3); # we didn't have session cookies in the first place
+#print $c->as_string;
+
+
+# Test handling of 'secure ' attribute for classic cookies
+$c = HTTP::Cookies->new;
+$req = HTTP::Request->new(GET => "https://1.1.1.1/");
+$req->header("Host", "www.acme.com:80");
+
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+$res->header("Set-Cookie" => "CUSTOMER=WILE_E_COYOTE ; secure ; path=/");
+#print $res->as_string;
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.acme.com/");
+$c->add_cookie_header($req);
+
+ok(!$req->header("Cookie"));
+
+$req->uri->scheme("https");
+$c->add_cookie_header($req);
+
+ok($req->header("Cookie"), "CUSTOMER=WILE_E_COYOTE");
+
+#print $req->as_string;
+#print $c->as_string;
+
+
+$req = HTTP::Request->new(GET => "ftp://ftp.activestate.com/");
+$c->add_cookie_header($req);
+ok(!$req->header("Cookie"));
+
+$req = HTTP::Request->new(GET => "file:/etc/motd");
+$c->add_cookie_header($req);
+ok(!$req->header("Cookie"));
+
+$req = HTTP::Request->new(GET => "mailto:gisle\@aas.no");
+$c->add_cookie_header($req);
+ok(!$req->header("Cookie"));
+
+
+# Test cookie called 'exipres' <https://rt.cpan.org/Ticket/Display.html?id=8108>
+$c = HTTP::Cookies->new;
+$req = HTTP::Request->new("GET" => "http://example.com");
+$res = HTTP::Response->new(200, "OK");
+$res->request($req);
+$res->header("Set-Cookie" => "Expires=10101");
+$c->extract_cookies($res);
+#print $c->as_string;
+ok($c->as_string, <<'EOT');
+Set-Cookie3: Expires=10101; path="/"; domain=example.com; discard; version=0
+EOT
+
+# Test empty cookie header [RT#29401]
+$c = HTTP::Cookies->new;
+$res->header("Set-Cookie" => ["CUSTOMER=WILE_E_COYOTE; path=/;", ""]);
+#print $res->as_string;
+$c->extract_cookies($res);
+#print $c->as_string;
+ok($c->as_string, <<'EOT');
+Set-Cookie3: CUSTOMER=WILE_E_COYOTE; path="/"; domain=example.com; path_spec; discard; version=0
+EOT
+
+# Test empty cookie part [RT#38480]
+$c = HTTP::Cookies->new;
+$res->header("Set-Cookie" => "CUSTOMER=WILE_E_COYOTE;;path=/;");
+#print $res->as_string;
+$c->extract_cookies($res);
+#print $c->as_string;
+ok($c->as_string, <<'EOT');
+Set-Cookie3: CUSTOMER=WILE_E_COYOTE; path="/"; domain=example.com; path_spec; discard; version=0
+EOT
+
+# Test Set-Cookie with version set
+$c = HTTP::Cookies->new;
+$res->header("Set-Cookie" => "foo=\"bar\";version=1");
+#print $res->as_string;
+$c->extract_cookies($res);
+#print $c->as_string;
+$req = HTTP::Request->new(GET => "http://www.example.com/foo");
+$c->add_cookie_header($req);
+#print $req->as_string;
+ok($req->header("Cookie"), "foo=\"bar\"");
+
+# Test cookies that expire far into the future [RT#50147]
+$c = HTTP::Cookies->new;
+$res->header("Set-Cookie", "PREF=ID=cee18f7c4e977184:TM=1254583090:LM=1254583090:S=Pdb0-hy9PxrNj4LL; expires=Mon, 03-Oct-2211 15:18:10 GMT; path=/; domain=.example.com");
+$res->push_header("Set-Cookie", "expired1=1; expires=Mon, 03-Oct-2001 15:18:10 GMT; path=/; domain=.example.com");
+$res->push_header("Set-Cookie", "expired2=1; expires=Fri Jan 1 00:00:00 GMT 1970; path=/; domain=.example.com");
+$res->push_header("Set-Cookie", "expired3=1; expires=Fri Jan 1 00:00:01 GMT 1970; path=/; domain=.example.com");
+$res->push_header("Set-Cookie", "expired4=1; expires=Thu Dec 31 23:59:59 GMT 1969; path=/; domain=.example.com");
+$res->push_header("Set-Cookie", "expired5=1; expires=Fri Feb 2 00:00:00 GMT 1950; path=/; domain=.example.com");
+$c->extract_cookies($res);
+#print $res->as_string;
+#print "---\n";
+#print $c->as_string;
+$req = HTTP::Request->new(GET => "http://www.example.com/foo");
+$c->add_cookie_header($req);
+#print $req->as_string;
+ok($req->header("Cookie"), "PREF=ID=cee18f7c4e977184:TM=1254583090:LM=1254583090:S=Pdb0-hy9PxrNj4LL");
+
+$c->clear_temporary_cookies;
+$req = HTTP::Request->new(GET => "http://www.example.com/foo");
+$c->add_cookie_header($req);
+#print $req->as_string;
+ok($req->header("Cookie"), "PREF=ID=cee18f7c4e977184:TM=1254583090:LM=1254583090:S=Pdb0-hy9PxrNj4LL");
+
+# Test merging of cookies
+$c = HTTP::Cookies->new;
+$res->header("Set-Cookie", "foo=1; path=/");
+$c->extract_cookies($res);
+
+$req = HTTP::Request->new(GET => "http://www.example.com/foo");
+$req->header("Cookie", "x=bcd");
+$c->add_cookie_header($req);
+ok($req->header("Cookie"), "x=bcd; foo=1");
+$c->add_cookie_header($req);
+ok($req->header("Cookie"), "x=bcd; foo=1; foo=1");
+#print $req->as_string;
+
+
+#-------------------------------------------------------------------
+
+sub interact
+{
+ my $c = shift;
+ my $url = shift;
+ my $req = HTTP::Request->new(POST => $url);
+ $c->add_cookie_header($req);
+ my $cookie = $req->header("Cookie");
+ my $res = HTTP::Response->new(200, "OK");
+ $res->request($req);
+ for (@_) { $res->push_header("Set-Cookie2" => $_) }
+ $c->extract_cookies($res);
+ return $cookie;
+}
+
+sub count_cookies
+{
+ my $c = shift;
+ my $no = 0;
+ $c->scan(sub { $no++ });
+ $no;
+}
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+plan tests => 133;
+
+use HTTP::Date;
+
+require Time::Local if $^O eq "MacOS";
+my $offset = ($^O eq "MacOS") ? Time::Local::timegm(0,0,0,1,0,70) : 0;
+
+# test str2time for supported dates. Test cases with 2 digit year
+# will probably break in year 2044.
+my(@tests) =
+(
+ 'Thu Feb 3 00:00:00 GMT 1994', # ctime format
+ 'Thu Feb 3 00:00:00 1994', # same as ctime, except no TZ
+
+ 'Thu, 03 Feb 1994 00:00:00 GMT', # proposed new HTTP format
+ 'Thursday, 03-Feb-94 00:00:00 GMT', # old rfc850 HTTP format
+ 'Thursday, 03-Feb-1994 00:00:00 GMT', # broken rfc850 HTTP format
+
+ '03/Feb/1994:00:00:00 0000', # common logfile format
+ '03/Feb/1994:01:00:00 +0100', # common logfile format
+ '02/Feb/1994:23:00:00 -0100', # common logfile format
+
+ '03 Feb 1994 00:00:00 GMT', # HTTP format (no weekday)
+ '03-Feb-94 00:00:00 GMT', # old rfc850 (no weekday)
+ '03-Feb-1994 00:00:00 GMT', # broken rfc850 (no weekday)
+ '03-Feb-1994 00:00 GMT', # broken rfc850 (no weekday, no seconds)
+ '03-Feb-1994 00:00', # VMS dir listing format
+
+ '03-Feb-94', # old rfc850 HTTP format (no weekday, no time)
+ '03-Feb-1994', # broken rfc850 HTTP format (no weekday, no time)
+ '03 Feb 1994', # proposed new HTTP format (no weekday, no time)
+ '03/Feb/1994', # common logfile format (no time, no offset)
+
+ #'Feb 3 00:00', # Unix 'ls -l' format (can't really test it here)
+ 'Feb 3 1994', # Unix 'ls -l' format
+
+ "02-03-94 12:00AM", # Windows 'dir' format
+
+ # ISO 8601 formats
+ '1994-02-03 00:00:00 +0000',
+ '1994-02-03',
+ '19940203',
+ '1994-02-03T00:00:00+0000',
+ '1994-02-02T23:00:00-0100',
+ '1994-02-02T23:00:00-01:00',
+ '1994-02-03T00:00:00 Z',
+ '19940203T000000Z',
+ '199402030000',
+
+ # A few tests with extra space at various places
+ ' 03/Feb/1994 ',
+ ' 03 Feb 1994 0:00 ',
+);
+
+my $time = (760233600 + $offset); # assume broken POSIX counting of seconds
+for (@tests) {
+ my $t;
+ if (/GMT/i) {
+ $t = str2time($_);
+ }
+ else {
+ $t = str2time($_, "GMT");
+ }
+ my $t2 = str2time(lc($_), "GMT");
+ my $t3 = str2time(uc($_), "GMT");
+
+ print "\n# '$_'\n";
+
+ ok($t, $time);
+ ok($t2, $time);
+ ok($t3, $time);
+}
+
+# test time2str
+ok(time2str($time), 'Thu, 03 Feb 1994 00:00:00 GMT');
+
+# test the 'ls -l' format with missing year$
+# round to nearest minute 3 days ago.
+$time = int((time - 3 * 24*60*60) /60)*60;
+my ($min, $hr, $mday, $mon) = (localtime $time)[1,2,3,4];
+$mon = (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$mon];
+my $str = sprintf("$mon %02d %02d:%02d", $mday, $hr, $min);
+my $t = str2time($str);
+ok($t, $time);
+
+# try some garbage.
+for (undef, '', 'Garbage',
+ 'Mandag 16. September 1996',
+ '12 Arp 2003',
+# 'Thu Feb 3 00:00:00 CET 1994',
+# 'Thu, 03 Feb 1994 00:00:00 CET',
+# 'Wednesday, 31-Dec-69 23:59:59 GMT',
+
+ '1980-00-01',
+ '1980-13-01',
+ '1980-01-00',
+ '1980-01-32',
+ '1980-01-01 25:00:00',
+ '1980-01-01 00:61:00',
+ '1980-01-01 00:00:61',
+ )
+{
+ my $bad = 0;
+ eval {
+ if (defined str2time $_) {
+ print "str2time($_) is not undefined\n";
+ $bad++;
+ }
+ };
+ print defined($_) ? "\n# '$_'\n" : "\n# undef\n";
+ ok(!$@);
+ ok(!$bad);
+}
+
+print "Testing AM/PM gruff...\n";
+
+# Test the str2iso routines
+use HTTP::Date qw(time2iso time2isoz);
+
+print "Testing time2iso functions\n";
+
+$t = time2iso(str2time("11-12-96 0:00AM"));
+ok($t, "1996-11-12 00:00:00");
+
+$t = time2iso(str2time("11-12-96 12:00AM"));
+ok($t, "1996-11-12 00:00:00");
+
+$t = time2iso(str2time("11-12-96 0:00PM"));
+ok($t, "1996-11-12 12:00:00");
+
+$t = time2iso(str2time("11-12-96 12:00PM"));
+ok($t, "1996-11-12 12:00:00");
+
+
+$t = time2iso(str2time("11-12-96 1:05AM"));
+ok($t, "1996-11-12 01:05:00");
+
+$t = time2iso(str2time("11-12-96 12:05AM"));
+ok($t, "1996-11-12 00:05:00");
+
+$t = time2iso(str2time("11-12-96 1:05PM"));
+ok($t, "1996-11-12 13:05:00");
+
+$t = time2iso(str2time("11-12-96 12:05PM"));
+ok($t, "1996-11-12 12:05:00");
+
+$t = str2time("2000-01-01 00:00:01.234");
+print "FRAC $t = ", time2iso($t), "\n";
+ok(abs(($t - int($t)) - 0.234) < 0.000001);
+
+$a = time2iso;
+$b = time2iso(500000);
+print "LOCAL $a $b\n";
+my $az = time2isoz;
+my $bz = time2isoz(500000);
+print "GMT $az $bz\n";
+
+for ($a, $b) { ok(/^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d$/); }
+for ($az, $bz) { ok(/^\d{4}-\d\d-\d\d \d\d:\d\d:\d\dZ$/); }
+
+# Test the parse_date interface
+use HTTP::Date qw(parse_date);
+
+my @d = parse_date("Jan 1 2001");
+
+ok(!defined(pop(@d)));
+ok("@d", "2001 1 1 0 0 0");
+
+# This test will break around year 2070
+ok(parse_date("03-Feb-20"), "2020-02-03 00:00:00");
+
+# This test will break around year 2048
+ok(parse_date("03-Feb-98"), "1998-02-03 00:00:00");
+
+print "HTTP::Date $HTTP::Date::VERSION\n";
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+plan tests => 6;
+
+use HTTP::Response;
+use HTTP::Headers::Auth;
+
+my $res = HTTP::Response->new(401);
+$res->push_header(WWW_Authenticate => qq(Foo realm="WallyWorld", foo=bar, Bar realm="WallyWorld2"));
+$res->push_header(WWW_Authenticate => qq(Basic Realm="WallyWorld", foo=bar, bar=baz));
+
+print $res->as_string;
+
+my %auth = $res->www_authenticate;
+
+ok(keys(%auth), 3);
+
+ok($auth{basic}{realm}, "WallyWorld");
+ok($auth{bar}{realm}, "WallyWorld2");
+
+$a = $res->www_authenticate;
+ok($a, 'Foo realm="WallyWorld", foo=bar, Bar realm="WallyWorld2", Basic Realm="WallyWorld", foo=bar, bar=baz');
+
+$res->www_authenticate("Basic realm=foo1");
+print $res->as_string;
+
+$res->www_authenticate(Basic => {realm => "foo2"});
+print $res->as_string;
+
+$res->www_authenticate(Basic => [realm => "foo3", foo=>33],
+ Digest => {nonce=>"bar", foo=>'foo'});
+print $res->as_string;
+
+$_ = $res->as_string;
+
+ok(/WWW-Authenticate: Basic realm="foo3", foo=33/);
+ok(/WWW-Authenticate: Digest nonce=bar, foo=foo/ ||
+ /WWW-Authenticate: Digest foo=foo, nonce=bar/);
+
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+plan tests => 4;
+
+require HTTP::Headers::ETag;
+
+my $h = HTTP::Headers->new;
+
+$h->etag("tag1");
+ok($h->etag, qq("tag1"));
+
+$h->etag("w/tag2");
+ok($h->etag, qq(W/"tag2"));
+
+$h->if_match(qq(W/"foo", bar, baz), "bar");
+$h->if_none_match(333);
+
+$h->if_range("tag3");
+ok($h->if_range, qq("tag3"));
+
+my $t = time;
+$h->if_range($t);
+ok($h->if_range, $t);
+
+print $h->as_string;
+
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+use HTTP::Headers::Util qw(split_header_words join_header_words);
+
+my @s_tests = (
+
+ ["foo" => "foo"],
+ ["foo=bar" => "foo=bar"],
+ [" foo " => "foo"],
+ ["foo=" => 'foo=""'],
+ ["foo=bar bar=baz" => "foo=bar; bar=baz"],
+ ["foo=bar;bar=baz" => "foo=bar; bar=baz"],
+ ['foo bar baz' => "foo; bar; baz"],
+ ['foo="\"" bar="\\\\"' => 'foo="\""; bar="\\\\"'],
+ ['foo,,,bar' => 'foo, bar'],
+ ['foo=bar,bar=baz' => 'foo=bar, bar=baz'],
+
+ ['TEXT/HTML; CHARSET=ISO-8859-1' =>
+ 'text/html; charset=ISO-8859-1'],
+
+ ['foo="bar"; port="80,81"; discard, bar=baz' =>
+ 'foo=bar; port="80,81"; discard, bar=baz'],
+
+ ['Basic realm="\"foo\\\\bar\""' =>
+ 'basic; realm="\"foo\\\\bar\""'],
+);
+
+plan tests => @s_tests + 2;
+
+for (@s_tests) {
+ my($arg, $expect) = @$_;
+ my @arg = ref($arg) ? @$arg : $arg;
+
+ my $res = join_header_words(split_header_words(@arg));
+ ok($res, $expect);
+}
+
+
+print "# Extra tests\n";
+# some extra tests
+ok(join_header_words("foo" => undef, "bar" => "baz"), "foo; bar=baz");
+ok(join_header_words(), "");
--- /dev/null
+#!perl -w
+
+use strict;
+use Test qw(plan ok);
+
+plan tests => 164;
+
+my($h, $h2);
+sub j { join("|", @_) }
+
+
+require HTTP::Headers;
+$h = HTTP::Headers->new;
+ok($h);
+ok(ref($h), "HTTP::Headers");
+ok($h->as_string, "");
+
+$h = HTTP::Headers->new(foo => "bar", foo => "baaaaz", Foo => "baz");
+ok($h->as_string, "Foo: bar\nFoo: baaaaz\nFoo: baz\n");
+
+$h = HTTP::Headers->new(foo => ["bar", "baz"]);
+ok($h->as_string, "Foo: bar\nFoo: baz\n");
+
+$h = HTTP::Headers->new(foo => 1, bar => 2, foo_bar => 3);
+ok($h->as_string, "Bar: 2\nFoo: 1\nFoo-Bar: 3\n");
+ok($h->as_string(";"), "Bar: 2;Foo: 1;Foo-Bar: 3;");
+
+ok($h->header("Foo"), 1);
+ok($h->header("FOO"), 1);
+ok(j($h->header("foo")), 1);
+ok($h->header("foo-bar"), 3);
+ok($h->header("foo_bar"), 3);
+ok($h->header("Not-There"), undef);
+ok(j($h->header("Not-There")), "");
+ok(eval { $h->header }, undef);
+ok($@);
+
+ok($h->header("Foo", 11), 1);
+ok($h->header("Foo", [1, 1]), 11);
+ok($h->header("Foo"), "1, 1");
+ok(j($h->header("Foo")), "1|1");
+ok($h->header(foo => 11, Foo => 12, bar => 22), 2);
+ok($h->header("Foo"), "11, 12");
+ok($h->header("Bar"), 22);
+ok($h->header("Bar", undef), 22);
+ok(j($h->header("bar", 22)), "");
+
+$h->push_header(Bar => 22);
+ok($h->header("Bar"), "22, 22");
+$h->push_header(Bar => [23 .. 25]);
+ok($h->header("Bar"), "22, 22, 23, 24, 25");
+ok(j($h->header("Bar")), "22|22|23|24|25");
+
+$h->clear;
+$h->header(Foo => 1);
+ok($h->as_string, "Foo: 1\n");
+$h->init_header(Foo => 2);
+$h->init_header(Bar => 2);
+ok($h->as_string, "Bar: 2\nFoo: 1\n");
+$h->init_header(Foo => [2, 3]);
+$h->init_header(Baz => [2, 3]);
+ok($h->as_string, "Bar: 2\nBaz: 2\nBaz: 3\nFoo: 1\n");
+
+eval { $h->init_header(A => 1, B => 2, C => 3) };
+ok($@);
+ok($h->as_string, "Bar: 2\nBaz: 2\nBaz: 3\nFoo: 1\n");
+
+ok($h->clone->remove_header("Foo"), 1);
+ok($h->clone->remove_header("Bar"), 1);
+ok($h->clone->remove_header("Baz"), 2);
+ok($h->clone->remove_header(qw(Foo Bar Baz Not-There)), 4);
+ok($h->clone->remove_header("Not-There"), 0);
+ok(j($h->clone->remove_header("Foo")), 1);
+ok(j($h->clone->remove_header("Bar")), 2);
+ok(j($h->clone->remove_header("Baz")), "2|3");
+ok(j($h->clone->remove_header(qw(Foo Bar Baz Not-There))), "1|2|2|3");
+ok(j($h->clone->remove_header("Not-There")), "");
+
+$h = HTTP::Headers->new(
+ allow => "GET",
+ content => "none",
+ content_type => "text/html",
+ content_md5 => "dummy",
+ content_encoding => "gzip",
+ content_foo => "bar",
+ last_modified => "yesterday",
+ expires => "tomorrow",
+ etag => "abc",
+ date => "today",
+ user_agent => "libwww-perl",
+ zoo => "foo",
+ );
+ok($h->as_string, <<EOT);
+Date: today
+User-Agent: libwww-perl
+ETag: abc
+Allow: GET
+Content-Encoding: gzip
+Content-MD5: dummy
+Content-Type: text/html
+Expires: tomorrow
+Last-Modified: yesterday
+Content: none
+Content-Foo: bar
+Zoo: foo
+EOT
+
+$h2 = $h->clone;
+ok($h->as_string, $h2->as_string);
+
+ok($h->remove_content_headers->as_string, <<EOT);
+Allow: GET
+Content-Encoding: gzip
+Content-MD5: dummy
+Content-Type: text/html
+Expires: tomorrow
+Last-Modified: yesterday
+Content-Foo: bar
+EOT
+
+ok($h->as_string, <<EOT);
+Date: today
+User-Agent: libwww-perl
+ETag: abc
+Content: none
+Zoo: foo
+EOT
+
+# separate code path for the void context case, so test it as well
+$h2->remove_content_headers;
+ok($h->as_string, $h2->as_string);
+
+$h->clear;
+ok($h->as_string, "");
+undef($h2);
+
+$h = HTTP::Headers->new;
+ok($h->header_field_names, 0);
+ok(j($h->header_field_names), "");
+
+$h = HTTP::Headers->new( etag => 1, foo => [2,3],
+ content_type => "text/plain");
+ok($h->header_field_names, 3);
+ok(j($h->header_field_names), "ETag|Content-Type|Foo");
+
+{
+ my @tmp;
+ $h->scan(sub { push(@tmp, @_) });
+ ok(j(@tmp), "ETag|1|Content-Type|text/plain|Foo|2|Foo|3");
+
+ @tmp = ();
+ eval { $h->scan(sub { push(@tmp, @_); die if $_[0] eq "Content-Type" }) };
+ ok($@);
+ ok(j(@tmp), "ETag|1|Content-Type|text/plain");
+
+ @tmp = ();
+ $h->scan(sub { push(@tmp, @_) });
+ ok(j(@tmp), "ETag|1|Content-Type|text/plain|Foo|2|Foo|3");
+}
+
+# CONVENIENCE METHODS
+
+$h = HTTP::Headers->new;
+ok($h->date, undef);
+ok($h->date(time), undef);
+ok(j($h->header_field_names), "Date");
+ok($h->header("Date") =~ /^[A-Z][a-z][a-z], \d\d .* GMT$/);
+{
+ my $off = time - $h->date;
+ ok($off == 0 || $off == 1);
+}
+
+if ($] < 5.006) {
+ Test::skip("Can't call variable method", 1) for 1..13;
+}
+else {
+# other date fields
+for my $field (qw(expires if_modified_since if_unmodified_since
+ last_modified))
+{
+ eval <<'EOT'; die $@ if $@;
+ ok($h->$field, undef);
+ ok($h->$field(time), undef);
+ ok((time - $h->$field) =~ /^[01]$/);
+EOT
+}
+ok(j($h->header_field_names), "Date|If-Modified-Since|If-Unmodified-Since|Expires|Last-Modified");
+}
+
+$h->clear;
+ok($h->content_type, "");
+ok($h->content_type("text/html"), "");
+ok($h->content_type, "text/html");
+ok($h->content_type(" TEXT / HTML ") , "text/html");
+ok($h->content_type, "text/html");
+ok(j($h->content_type), "text/html");
+ok($h->content_type("text/html;\n charSet = \"ISO-8859-1\"; Foo=1 "), "text/html");
+ok($h->content_type, "text/html");
+ok(j($h->content_type), "text/html|charSet = \"ISO-8859-1\"; Foo=1 ");
+ok($h->header("content_type"), "text/html;\n charSet = \"ISO-8859-1\"; Foo=1 ");
+ok($h->content_is_html);
+ok(!$h->content_is_xhtml);
+ok(!$h->content_is_xml);
+$h->content_type("application/xhtml+xml");
+ok($h->content_is_html);
+ok($h->content_is_xhtml);
+ok($h->content_is_xml);
+ok($h->content_type("text/html;\n charSet = \"ISO-8859-1\"; Foo=1 "), "application/xhtml+xml");
+
+ok($h->content_encoding, undef);
+ok($h->content_encoding("gzip"), undef);
+ok($h->content_encoding, "gzip");
+ok(j($h->header_field_names), "Content-Encoding|Content-Type");
+
+ok($h->content_language, undef);
+ok($h->content_language("no"), undef);
+ok($h->content_language, "no");
+
+ok($h->title, undef);
+ok($h->title("This is a test"), undef);
+ok($h->title, "This is a test");
+
+ok($h->user_agent, undef);
+ok($h->user_agent("Mozilla/1.2"), undef);
+ok($h->user_agent, "Mozilla/1.2");
+
+ok($h->server, undef);
+ok($h->server("Apache/2.1"), undef);
+ok($h->server, "Apache/2.1");
+
+ok($h->from("Gisle\@ActiveState.com"), undef);
+ok($h->header("from", "Gisle\@ActiveState.com"));
+
+ok($h->referer("http://www.example.com"), undef);
+ok($h->referer, "http://www.example.com");
+ok($h->referrer, "http://www.example.com");
+ok($h->referer("http://www.example.com/#bar"), "http://www.example.com");
+ok($h->referer, "http://www.example.com/");
+{
+ require URI;
+ my $u = URI->new("http://www.example.com#bar");
+ $h->referer($u);
+ ok($u->as_string, "http://www.example.com#bar");
+ ok($h->referer->fragment, undef);
+ ok($h->referrer->as_string, "http://www.example.com");
+}
+
+ok($h->as_string, <<EOT);
+From: Gisle\@ActiveState.com
+Referer: http://www.example.com
+User-Agent: Mozilla/1.2
+Server: Apache/2.1
+Content-Encoding: gzip
+Content-Language: no
+Content-Type: text/html;
+ charSet = "ISO-8859-1"; Foo=1
+Title: This is a test
+EOT
+
+$h->clear;
+ok($h->www_authenticate("foo"), undef);
+ok($h->www_authenticate("bar"), "foo");
+ok($h->www_authenticate, "bar");
+ok($h->proxy_authenticate("foo"), undef);
+ok($h->proxy_authenticate("bar"), "foo");
+ok($h->proxy_authenticate, "bar");
+
+ok($h->authorization_basic, undef);
+ok($h->authorization_basic("u"), undef);
+ok($h->authorization_basic("u", "p"), "u:");
+ok($h->authorization_basic, "u:p");
+ok(j($h->authorization_basic), "u|p");
+ok($h->authorization, "Basic dTpw");
+
+ok(eval { $h->authorization_basic("u2:p") }, undef);
+ok($@);
+ok(j($h->authorization_basic), "u|p");
+
+ok($h->proxy_authorization_basic("u2", "p2"), undef);
+ok(j($h->proxy_authorization_basic), "u2|p2");
+ok($h->proxy_authorization, "Basic dTI6cDI=");
+
+ok($h->as_string, <<EOT);
+Authorization: Basic dTpw
+Proxy-Authorization: Basic dTI6cDI=
+Proxy-Authenticate: bar
+WWW-Authenticate: bar
+EOT
+
+
+
+#---- old tests below -----
+
+$h = new HTTP::Headers
+ mime_version => "1.0",
+ content_type => "text/html";
+$h->header(URI => "http://www.oslonett.no/");
+
+ok($h->header("MIME-Version"), "1.0");
+ok($h->header('Uri'), "http://www.oslonett.no/");
+
+$h->header("MY-header" => "foo",
+ "Date" => "somedate",
+ "Accept" => ["text/plain", "image/*"],
+ );
+$h->push_header("accept" => "audio/basic");
+
+ok($h->header("date"), "somedate");
+
+my @accept = $h->header("accept");
+ok(@accept, 3);
+
+$h->remove_header("uri", "date");
+
+my $str = $h->as_string;
+my $lines = ($str =~ tr/\n/\n/);
+ok($lines, 6);
+
+$h2 = $h->clone;
+
+$h->header("accept", "*/*");
+$h->remove_header("my-header");
+
+@accept = $h2->header("accept");
+ok(@accept, 3);
+
+@accept = $h->header("accept");
+ok(@accept, 1);
+
+# Check order of headers, but first remove this one
+$h2->remove_header('mime_version');
+
+# and add this general header
+$h2->header(Connection => 'close');
+
+my @x = ();
+$h2->scan(sub {push(@x, shift);});
+ok(join(";", @x), "Connection;Accept;Accept;Accept;Content-Type;MY-Header");
+
+# Check headers with embedded newlines:
+$h = HTTP::Headers->new(
+ a => "foo\n\n",
+ b => "foo\nbar",
+ c => "foo\n\nbar\n\n",
+ d => "foo\n\tbar",
+ e => "foo\n bar ",
+ f => "foo\n bar\n baz\nbaz",
+ );
+ok($h->as_string("<<\n"), <<EOT);
+A: foo<<
+B: foo<<
+ bar<<
+C: foo<<
+ bar<<
+D: foo<<
+\tbar<<
+E: foo<<
+ bar<<
+F: foo<<
+ bar<<
+ baz<<
+ baz<<
+EOT
+
+# Check for attempt to send a body
+$h = HTTP::Headers->new(
+ a => "foo\r\n\r\nevil body" ,
+ b => "foo\015\012\015\012evil body" ,
+ c => "foo\x0d\x0a\x0d\x0aevil body" ,
+);
+ok (
+ $h->as_string(),
+ "A: foo\r\n evil body\n".
+ "B: foo\015\012 evil body\n" .
+ "C: foo\x0d\x0a evil body\n" ,
+ "embedded CRLF are stripped out");
+
+# Check with FALSE $HTML::Headers::TRANSLATE_UNDERSCORE
+{
+ local($HTTP::Headers::TRANSLATE_UNDERSCORE);
+ $HTTP::Headers::TRANSLATE_UNDERSCORE = undef; # avoid -w warning
+
+ $h = HTTP::Headers->new;
+ $h->header(abc_abc => "foo");
+ $h->header("abc-abc" => "bar");
+
+ ok($h->header("ABC_ABC"), "foo");
+ ok($h->header("ABC-ABC"),"bar");
+ ok($h->remove_header("Abc_Abc"));
+ ok(!defined($h->header("abc_abc")));
+ ok($h->header("ABC-ABC"), "bar");
+}
+
+# Check if objects as header values works
+require URI;
+$h->header(URI => URI->new("http://www.perl.org"));
+
+ok($h->header("URI")->scheme, "http");
+
+$h->clear;
+ok($h->as_string, "");
+
+$h->content_type("text/plain");
+$h->header(content_md5 => "dummy");
+$h->header("Content-Foo" => "foo");
+$h->header(Location => "http:", xyzzy => "plugh!");
+
+ok($h->as_string, <<EOT);
+Location: http:
+Content-MD5: dummy
+Content-Type: text/plain
+Content-Foo: foo
+Xyzzy: plugh!
+EOT
+
+my $c = $h->remove_content_headers;
+ok($h->as_string, <<EOT);
+Location: http:
+Xyzzy: plugh!
+EOT
+
+ok($c->as_string, <<EOT);
+Content-MD5: dummy
+Content-Type: text/plain
+Content-Foo: foo
+EOT
+
+$h = HTTP::Headers->new;
+$h->content_type("text/plain");
+$h->header(":foo_bar", 1);
+$h->push_header(":content_type", "text/html");
+ok(j($h->header_field_names), "Content-Type|:content_type|:foo_bar");
+ok($h->header('Content-Type'), "text/plain");
+ok($h->header(':Content_Type'), undef);
+ok($h->header(':content_type'), "text/html");
+ok($h->as_string, <<EOT);
+Content-Type: text/plain
+content_type: text/html
+foo_bar: 1
+EOT
+
+# [RT#30579] IE6 appens "; length = NNNN" on If-Modified-Since (can we handle it)
+$h = HTTP::Headers->new(
+ if_modified_since => "Sat, 29 Oct 1994 19:43:31 GMT; length=34343"
+);
+ok(gmtime($h->if_modified_since), "Sat Oct 29 19:43:31 1994");
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+plan tests => 14;
+
+use HTTP::Config;
+
+sub j { join("|", @_) }
+
+my $conf = HTTP::Config->new;
+ok($conf->empty);
+$conf->add_item(42);
+ok(!$conf->empty);
+ok(j($conf->matching_items("http://www.example.com/foo")), 42);
+ok(j($conf->remove_items), 42);
+ok($conf->matching_items("http://www.example.com/foo"), 0);
+
+$conf = HTTP::Config->new;
+
+$conf->add_item("always");
+$conf->add_item("GET", m_method => ["GET", "HEAD"]);
+$conf->add_item("POST", m_method => "POST");
+$conf->add_item(".com", m_domain => ".com");
+$conf->add_item("secure", m_secure => 1);
+$conf->add_item("not secure", m_secure => 0);
+$conf->add_item("slash", m_host_port => "www.example.com:80", m_path_prefix => "/");
+$conf->add_item("u:p", m_host_port => "www.example.com:80", m_path_prefix => "/foo");
+$conf->add_item("success", m_code => "2xx");
+
+use HTTP::Request;
+my $request = HTTP::Request->new(HEAD => "http://www.example.com/foo/bar");
+$request->header("User-Agent" => "Moz/1.0");
+
+ok(j($conf->matching_items($request)), "u:p|slash|.com|GET|not secure|always");
+
+$request->method("HEAD");
+$request->uri->scheme("https");
+
+ok(j($conf->matching_items($request)), ".com|GET|secure|always");
+
+ok(j($conf->matching_items("http://activestate.com")), ".com|not secure|always");
+
+use HTTP::Response;
+my $response = HTTP::Response->new(200 => "OK");
+$response->content_type("text/plain");
+$response->content("Hello, world!\n");
+$response->request($request);
+
+ok(j($conf->matching_items($response)), ".com|success|GET|secure|always");
+
+$conf->remove_items(m_secure => 1);
+$conf->remove_items(m_domain => ".com");
+ok(j($conf->matching_items($response)), "success|GET|always");
+
+$conf->remove_items; # start fresh
+ok(j($conf->matching_items($response)), "");
+
+$conf->add_item("any", "m_media_type" => "*/*");
+$conf->add_item("text", m_media_type => "text/*");
+$conf->add_item("html", m_media_type => "html");
+$conf->add_item("HTML", m_media_type => "text/html");
+$conf->add_item("xhtml", m_media_type => "xhtml");
+
+ok(j($conf->matching_items($response)), "text|any");
+
+$response->content_type("application/xhtml+xml");
+ok(j($conf->matching_items($response)), "xhtml|html|any");
+
+$response->content_type("text/html");
+ok(j($conf->matching_items($response)), "HTML|html|text|any");
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+plan tests => 34;
+#use Data::Dump ();
+
+my $CRLF = "\015\012";
+my $LF = "\012";
+
+{
+ package HTTP;
+ use vars qw(@ISA);
+ require Net::HTTP::Methods;
+ @ISA=qw(Net::HTTP::Methods);
+
+ my %servers = (
+ a => { "/" => "HTTP/1.0 200 OK${CRLF}Content-Type: text/plain${CRLF}Content-Length: 6${CRLF}${CRLF}Hello\n",
+ "/bad1" => "HTTP/1.0 200 OK${LF}Server: foo${LF}HTTP/1.0 200 OK${LF}Content-type: text/foo${LF}${LF}abc\n",
+ "/09" => "Hello${CRLF}World!${CRLF}",
+ "/chunked" => "HTTP/1.1 200 OK${CRLF}Transfer-Encoding: chunked${CRLF}${CRLF}0002; foo=3; bar${CRLF}He${CRLF}1${CRLF}l${CRLF}2${CRLF}lo${CRLF}0000${CRLF}Content-MD5: xxx${CRLF}${CRLF}",
+ "/head" => "HTTP/1.1 200 OK${CRLF}Content-Length: 16${CRLF}Content-Type: text/plain${CRLF}${CRLF}",
+ "/colon-header" => "HTTP/1.1 200 OK${CRLF}Content-Type: text/plain${CRLF}Content-Length: 6${CRLF}Bad-Header: :foo${CRLF}${CRLF}Hello\n",
+ },
+ );
+
+ sub http_connect {
+ my($self, $cnf) = @_;
+ my $server = $servers{$cnf->{PeerAddr}} || return undef;
+ ${*$self}{server} = $server;
+ ${*$self}{read_chunk_size} = $cnf->{ReadChunkSize};
+ return $self;
+ }
+
+ sub print {
+ my $self = shift;
+ #Data::Dump::dump("PRINT", @_);
+ my $in = shift;
+ my($method, $uri) = split(' ', $in);
+
+ my $out;
+ if ($method eq "TRACE") {
+ my $len = length($in);
+ $out = "HTTP/1.0 200 OK${CRLF}Content-Length: $len${CRLF}" .
+ "Content-Type: message/http${CRLF}${CRLF}" .
+ $in;
+ }
+ else {
+ $out = ${*$self}{server}{$uri};
+ $out = "HTTP/1.0 404 Not found${CRLF}${CRLF}" unless defined $out;
+ }
+
+ ${*$self}{out} .= $out;
+ return 1;
+ }
+
+ sub sysread {
+ my $self = shift;
+ #Data::Dump::dump("SYSREAD", @_);
+ my $length = $_[1];
+ my $offset = $_[2] || 0;
+
+ if (my $read_chunk_size = ${*$self}{read_chunk_size}) {
+ $length = $read_chunk_size if $read_chunk_size < $length;
+ }
+
+ my $data = substr(${*$self}{out}, 0, $length, "");
+ return 0 unless length($data);
+
+ $_[0] = "" unless defined $_[0];
+ substr($_[0], $offset) = $data;
+ return length($data);
+ }
+
+ # ----------------
+
+ sub request {
+ my($self, $method, $uri, $headers, $opt) = @_;
+ $headers ||= [];
+ $opt ||= {};
+
+ my($code, $message, @h);
+ my $buf = "";
+ eval {
+ $self->write_request($method, $uri, @$headers) || die "Can't write request";
+ ($code, $message, @h) = $self->read_response_headers(%$opt);
+
+ my $tmp;
+ my $n;
+ while ($n = $self->read_entity_body($tmp, 32)) {
+ #Data::Dump::dump($tmp, $n);
+ $buf .= $tmp;
+ }
+
+ push(@h, $self->get_trailers);
+
+ };
+
+ my %res = ( code => $code,
+ message => $message,
+ headers => \@h,
+ content => $buf,
+ );
+
+ if ($@) {
+ $res{error} = $@;
+ }
+
+ return \%res;
+ }
+}
+
+# Start testing
+my $h;
+my $res;
+
+$h = HTTP->new(Host => "a", KeepAlive => 1) || die;
+$res = $h->request(GET => "/");
+
+#Data::Dump::dump($res);
+
+ok($res->{code}, 200);
+ok($res->{content}, "Hello\n");
+
+$res = $h->request(GET => "/404");
+ok($res->{code}, 404);
+
+$res = $h->request(TRACE => "/foo");
+ok($res->{code}, 200);
+ok($res->{content}, "TRACE /foo HTTP/1.1${CRLF}Keep-Alive: 300${CRLF}Connection: Keep-Alive${CRLF}Host: a${CRLF}${CRLF}");
+
+# try to turn off keep alive
+$h->keep_alive(0);
+$res = $h->request(TRACE => "/foo");
+ok($res->{code}, "200");
+ok($res->{content}, "TRACE /foo HTTP/1.1${CRLF}Connection: close${CRLF}Host: a${CRLF}${CRLF}");
+
+# try a bad one
+$res = $h->request(GET => "/bad1", [], {laxed => 1});
+ok($res->{code}, "200");
+ok($res->{message}, "OK");
+ok("@{$res->{headers}}", "Server foo Content-type text/foo");
+ok($res->{content}, "abc\n");
+
+$res = $h->request(GET => "/bad1");
+ok($res->{error} =~ /Bad header/);
+ok(!$res->{code});
+$h = undef; # it is in a bad state now
+
+$h = HTTP->new("a") || die; # reconnect
+$res = $h->request(GET => "/09", [], {laxed => 1});
+ok($res->{code}, "200");
+ok($res->{message}, "Assumed OK");
+ok($res->{content}, "Hello${CRLF}World!${CRLF}");
+ok($h->peer_http_version, "0.9");
+
+$res = $h->request(GET => "/09");
+ok($res->{error} =~ /^Bad response status line: 'Hello'/);
+$h = undef; # it's in a bad state again
+
+$h = HTTP->new(Host => "a", KeepAlive => 1, ReadChunkSize => 1) || die; # reconnect
+$res = $h->request(GET => "/chunked");
+ok($res->{code}, 200);
+ok($res->{content}, "Hello");
+ok("@{$res->{headers}}", "Transfer-Encoding chunked Content-MD5 xxx");
+
+# once more
+$res = $h->request(GET => "/chunked");
+ok($res->{code}, "200");
+ok($res->{content}, "Hello");
+ok("@{$res->{headers}}", "Transfer-Encoding chunked Content-MD5 xxx");
+
+# test head
+$res = $h->request(HEAD => "/head");
+ok($res->{code}, "200");
+ok($res->{content}, "");
+ok("@{$res->{headers}}", "Content-Length 16 Content-Type text/plain");
+
+$res = $h->request(GET => "/");
+ok($res->{code}, "200");
+ok($res->{content}, "Hello\n");
+
+$h = HTTP->new(Host => undef, PeerAddr => "a", );
+$h->http_version("1.0");
+ok(!defined $h->host);
+$res = $h->request(TRACE => "/");
+ok($res->{code}, "200");
+ok($res->{content}, "TRACE / HTTP/1.0\r\n\r\n");
+
+# check that headers with colons at the start of values don't break
+$res = $h->request(GET => '/colon-header');
+ok("@{$res->{headers}}", "Content-Type text/plain Content-Length 6 Bad-Header :foo");
+
+require Net::HTTP;
+eval {
+ $h = Net::HTTP->new;
+};
+print "# $@";
+ok($@);
+
--- /dev/null
+#!perl -w
+
+use Test;
+plan tests => 10;
+
+use File::Listing;
+
+$dir = <<'EOL';
+total 68
+drwxr-xr-x 4 aas users 1024 Mar 16 15:47 .
+drwxr-xr-x 11 aas users 1024 Mar 15 19:22 ..
+drwxr-xr-x 2 aas users 1024 Mar 16 15:47 CVS
+-rw-r--r-- 1 aas users 2384 Feb 26 21:14 Debug.pm
+-rw-r--r-- 1 aas users 2145 Feb 26 20:09 IO.pm
+-rw-r--r-- 1 aas users 3960 Mar 15 18:05 MediaTypes.pm
+-rw-r--r-- 1 aas users 792 Feb 26 20:12 MemberMixin.pm
+drwxr-xr-x 3 aas users 1024 Mar 15 18:05 Protocol
+-rw-r--r-- 1 aas users 5613 Feb 26 20:16 Protocol.pm
+-rw-r--r-- 1 aas users 5963 Feb 26 21:27 RobotUA.pm
+-rw-r--r-- 1 aas users 5071 Mar 16 12:25 Simple.pm
+-rw-r--r-- 1 aas users 8817 Mar 15 18:05 Socket.pm
+-rw-r--r-- 1 aas users 2121 Feb 5 14:22 TkIO.pm
+-rw-r--r-- 1 aas users 19628 Mar 15 18:05 UserAgent.pm
+-rw-r--r-- 1 aas users 2841 Feb 5 19:06 media.types
+
+CVS:
+total 5
+drwxr-xr-x 2 aas users 1024 Mar 16 15:47 .
+drwxr-xr-x 4 aas users 1024 Mar 16 15:47 ..
+-rw-r--r-- 1 aas users 545 Mar 16 15:47 Entries
+-rw-r--r-- 1 aas users 39 Mar 10 09:05 Repository
+-rw-r--r-- 1 aas users 19 Mar 10 09:05 Root
+
+Protocol:
+total 37
+drwxr-xr-x 3 aas users 1024 Mar 15 18:05 .
+drwxr-xr-x 4 aas users 1024 Mar 16 15:47 ..
+drwxr-xr-x 2 aas users 1024 Mar 15 18:05 CVS
+-rw-r--r-- 1 aas users 4646 Feb 26 20:13 file.pm
+-rw-r--r-- 1 aas users 13006 Mar 15 18:05 ftp.pm
+-rw-r--r-- 1 aas users 5935 Mar 6 10:29 gopher.pm
+-rw-r--r-- 1 aas users 5453 Mar 6 10:29 http.pm
+-rw-r--r-- 1 aas users 2365 Feb 26 20:13 mailto.pm
+
+Protocol/CVS:
+total 5
+drwxr-xr-x 2 aas users 1024 Mar 15 18:05 .
+drwxr-xr-x 3 aas users 1024 Mar 15 18:05 ..
+-rw-r--r-- 1 aas users 238 Mar 15 18:05 Entries
+-rw-r--r-- 1 aas users 48 Mar 10 09:05 Repository
+-rw-r--r-- 1 aas users 19 Mar 10 09:05 Root
+EOL
+
+@dir = parse_dir($dir, undef, 'unix');
+
+ok(@dir, 25);
+
+for (@dir) {
+ ($name, $type, $size, $mtime, $mode) = @$_;
+ $size ||= 0; # ensure that it is defined
+ printf "# %-25s $type %6d ", $name, $size;
+ print scalar(localtime($mtime));
+ printf " %06o", $mode;
+ print "\n";
+}
+
+# Pick out the Socket.pm line as the sample we check carefully
+($name, $type, $size, $mtime, $mode) = @{$dir[9]};
+
+ok($name, "Socket.pm");
+ok($type, "f");
+ok($size, 8817);
+
+# Must be careful when checking the time stamps because we don't know
+# which year if this script lives for a long time.
+$timestring = scalar(localtime($mtime));
+ok($timestring =~ /Mar\s+15\s+18:05/);
+
+ok($mode, 0100644);
+
+@dir = parse_dir(<<'EOT');
+drwxr-xr-x 21 root root 704 2007-03-22 21:48 dir
+EOT
+
+ok(@dir, 1);
+ok($dir[0][0], "dir");
+ok($dir[0][1], "d");
+
+$timestring = scalar(localtime($dir[0][3]));
+print "# $timestring\n";
+ok($timestring =~ /^Thu Mar 22 21:48/);
--- /dev/null
+#!perl -w
+
+use Test;
+
+use LWP::MediaTypes;
+
+require URI::URL;
+
+$url1 = new URI::URL 'http://www/foo/test.gif?search+x#frag';
+$url2 = new URI::URL 'http:test';
+
+my $pwd if $^O eq "MacOS";
+
+unless ($^O eq "MacOS") {
+ $file = "/etc/passwd";
+ -r $file or $file = "./README";
+}
+else {
+ require Mac::Files;
+ $pwd = `pwd`;
+ chomp($pwd);
+ my $dir = Mac::Files::FindFolder(Mac::Files::kOnSystemDisk(),
+ Mac::Files::kDesktopFolderType());
+ chdir($dir);
+ $file = "README";
+ open(README,">$file") or die "Unable to open $file";
+ print README "This is a dummy file for LWP testing purposes\n";
+ close README;
+ open(README,">/dev/null") or die "Unable to open /dev/null";
+ print README "This is a dummy file for LWP testing purposes\n";
+ close README;
+}
+
+@tests =
+(
+ ["/this.dir/file.html" => "text/html",],
+ ["test.gif.htm" => "text/html",],
+ ["test.txt.gz" => "text/plain", "gzip"],
+ ["gif.foo" => "application/octet-stream",],
+ ["lwp-0.03.tar.Z" => "application/x-tar", "compress"],
+ [$file => "text/plain",],
+ ["/random/file" => "application/octet-stream",],
+ [($^O eq 'VMS'? "nl:" : "/dev/null") => "text/plain",],
+ [$url1 => "image/gif",],
+ [$url2 => "application/octet-stream",],
+ ["x.ppm.Z.UU" => "image/x-portable-pixmap","compress","x-uuencode",],
+);
+
+plan tests => @tests * 3 + 6;
+
+if ($ENV{HOME} and -f "$ENV{HOME}/.mime.types") {
+ warn "
+The MediaTypes test might fail because you have a private ~/.mime.types file
+If you get a failed test, try to move it away while testing.
+";
+}
+
+
+for (@tests) {
+ ($file, $expectedtype, @expectedEnc) = @$_;
+ $type1 = guess_media_type($file);
+ ($type, @enc) = guess_media_type($file);
+ ok($type1, $type);
+ ok($type, $expectedtype);
+ ok("@enc", "@expectedEnc");
+}
+
+@imgSuffix = media_suffix('image/*');
+print "# Image suffixes: @imgSuffix\n";
+ok(grep $_ eq "gif", @imgSuffix);
+
+@audioSuffix = media_suffix('AUDIO/*');
+print "# Audio suffixes: @audioSuffix\n";
+ok(grep $_ eq 'oga', @audioSuffix);
+ok(media_suffix('audio/OGG'), 'oga');
+
+require HTTP::Response;
+$r = new HTTP::Response 200, "Document follows";
+$r->title("file.tar.gz.uu");
+guess_media_type($r->title, $r);
+#print $r->as_string;
+
+ok($r->content_type, "application/x-tar");
+
+@enc = $r->header("Content-Encoding");
+ok("@enc", "gzip x-uuencode");
+
+#
+use LWP::MediaTypes qw(add_type add_encoding);
+add_type("x-world/x-vrml", qw(wrl vrml));
+add_encoding("x-gzip" => "gz");
+add_encoding(rot13 => "r13");
+
+@x = guess_media_type("foo.vrml.r13.gz");
+#print "@x\n";
+ok("@x", "x-world/x-vrml rot13 x-gzip");
+
+#print LWP::MediaTypes::_dump();
+
+if($^O eq "MacOS") {
+ unlink "README";
+ unlink "/dev/null";
+ chdir($pwd);
+}
+
--- /dev/null
+#!perl -w
+
+use strict;
+
+BEGIN {
+ eval {
+ require Encode;
+ Encode::find_encoding("UTF-16-BE") || die "Need a version of Encode that supports UTF-16-BE";
+ };
+ if ($@) {
+ print "1..0 # Skipped: Encode not available\n";
+ print $@;
+ exit;
+ }
+}
+
+use Test;
+plan tests => 36;
+
+use HTTP::Response;
+my $r = HTTP::Response->new(200, "OK");
+ok($r->content_charset, undef);
+ok($r->content_type_charset, undef);
+
+$r->content_type("text/plain");
+ok($r->content_charset, undef);
+
+$r->content("abc");
+ok($r->content_charset, "US-ASCII");
+
+$r->content("f\xE5rep\xF8lse\n");
+ok($r->content_charset, "ISO-8859-1");
+
+$r->content("f\xC3\xA5rep\xC3\xB8lse\n");
+ok($r->content_charset, "UTF-8");
+
+$r->content_type("text/html");
+$r->content(<<'EOT');
+<meta charset="UTF-8">
+EOT
+ok($r->content_charset, "UTF-8");
+
+$r->content(<<'EOT');
+<body>
+<META CharSet="Utf-16-LE">
+<meta charset="ISO-8859-1">
+EOT
+ok($r->content_charset, "UTF-8");
+
+$r->content(<<'EOT');
+<!-- <meta charset="UTF-8">
+EOT
+ok($r->content_charset, "US-ASCII");
+
+$r->content(<<'EOT');
+<meta content="text/plain; charset=UTF-8">
+EOT
+ok($r->content_charset, "UTF-8");
+
+$r->content_type('text/plain; charset="iso-8859-1"');
+ok($r->content_charset, "ISO-8859-1");
+ok($r->content_type_charset, "ISO-8859-1");
+
+$r->content_type("application/xml");
+$r->content("<foo>..</foo>");
+ok($r->content_charset, "UTF-8");
+
+require Encode;
+for my $enc ("UTF-16-BE", "UTF-16-LE", "UTF-32-BE", "UTF-32-LE") {
+ $r->content(Encode::encode($enc, "<foo>..</foo>"));
+ ok($r->content_charset, $enc);
+}
+
+$r->content(<<'EOT');
+<?xml version="1.0" encoding="utf8" ?>
+EOT
+ok($r->content_charset, "utf8");
+
+$r->content(<<'EOT');
+<?xml version="1.0" encoding=" "?>
+EOT
+ok($r->content_charset, "UTF-8");
+
+$r->content(<<'EOT');
+<?xml version="1.0" encoding=" ISO-8859-1 "?>
+EOT
+ok($r->content_charset, "ISO-8859-1");
+
+$r->content(<<'EOT');
+<?xml version="1.0"
+encoding="US-ASCII" ?>
+EOT
+ok($r->content_charset, "US-ASCII");
+
+{
+ sub TIESCALAR{bless[]}
+ tie $_, "";
+ my $fail = 0;
+ sub STORE{ ++$fail }
+ sub FETCH{}
+ $r->content_charset;
+ ok($fail, 0, 'content_charset leaves $_ alone');
+}
+
+$r->remove_content_headers;
+$r->content_type("text/plain; charset=UTF-8");
+$r->content("abc");
+ok($r->decoded_content, "abc");
+
+$r->content("\xc3\xa5");
+ok($r->decoded_content, chr(0xE5));
+ok($r->decoded_content(charset => "none"), "\xC3\xA5");
+ok($r->decoded_content(alt_charset => "UTF-8"), chr(0xE5));
+ok($r->decoded_content(alt_charset => "none"), chr(0xE5));
+
+$r->content_type("text/plain; charset=UTF");
+ok($r->decoded_content, undef);
+ok($r->decoded_content(charset => "UTF-8"), chr(0xE5));
+ok($r->decoded_content(charset => "none"), "\xC3\xA5");
+ok($r->decoded_content(alt_charset => "UTF-8"), chr(0xE5));
+ok($r->decoded_content(alt_charset => "none"), "\xC3\xA5");
+
+$r->content_type("text/plain");
+ok($r->decoded_content, chr(0xE5));
+ok($r->decoded_content(charset => "none"), "\xC3\xA5");
+ok($r->decoded_content(default_charset => "ISO-8859-1"), "\xC3\xA5");
+ok($r->decoded_content(default_charset => "latin1"), "\xC3\xA5");
--- /dev/null
+#!perl -w
+
+# This is the old message.t test. It is not maintained any more,
+# but kept around in case it happens to catch any mistakes. Please
+# add new tests to message.t instead.
+
+use strict;
+use Test qw(plan ok);
+
+plan tests => 20;
+
+require HTTP::Request;
+require HTTP::Response;
+
+require Time::Local if $^O eq "MacOS";
+my $offset = ($^O eq "MacOS") ? Time::Local::timegm(0,0,0,1,0,70) : 0;
+
+my $req = HTTP::Request->new(GET => "http://www.sn.no/");
+$req->header(
+ "if-modified-since" => "Thu, 03 Feb 1994 00:00:00 GMT",
+ "mime-version" => "1.0");
+
+ok($req->as_string =~ /^GET/m);
+ok($req->header("MIME-Version"), "1.0");
+ok($req->if_modified_since, ((760233600 + $offset) || 0));
+
+$req->content("gisle");
+$req->add_content(" aas");
+$req->add_content(\ " old interface is depreciated");
+${$req->content_ref} =~ s/\s+is\s+depreciated//;
+
+ok($req->content, "gisle aas old interface");
+
+my $time = time;
+$req->date($time);
+my $timestr = gmtime($time);
+my($month) = ($timestr =~ /^\S+\s+(\S+)/); # extract month;
+#print "These should represent the same time:\n\t", $req->header('Date'), "\n\t$timestr\n";
+ok($req->header('Date') =~ /\Q$month/);
+
+$req->authorization_basic("gisle", "passwd");
+ok($req->header("Authorization"), "Basic Z2lzbGU6cGFzc3dk");
+
+my($user, $pass) = $req->authorization_basic;
+ok($user, "gisle");
+ok($pass, "passwd");
+
+# Check the response
+my $res = HTTP::Response->new(200, "This message");
+ok($res->is_success);
+
+my $html = $res->error_as_HTML;
+ok($html =~ /<head>/i && $html =~ /This message/);
+
+$res->content_type("text/html;version=3.0");
+$res->content("<html>...</html>\n");
+
+my $res2 = $res->clone;
+ok($res2->code, 200);
+ok($res2->header("cOntent-TYPE"), "text/html;version=3.0");
+ok($res2->content =~ />\.\.\.</);
+
+# Check the base method:
+$res = HTTP::Response->new(200, "This message");
+ok($res->base, undef);
+$res->request($req);
+$res->content_type("image/gif");
+
+ok($res->base, "http://www.sn.no/");
+$res->header('Base', 'http://www.sn.no/xxx/');
+ok($res->base, "http://www.sn.no/xxx/");
+
+# Check the AUTLOAD delegate method with regular expressions
+"This string contains text/html" =~ /(\w+\/\w+)/;
+$res->content_type($1);
+ok($res->content_type, "text/html");
+
+# Check what happens when passed a new URI object
+require URI;
+$req = HTTP::Request->new(GET => URI->new("http://localhost"));
+ok($req->uri, "http://localhost");
+
+$req = HTTP::Request->new(GET => "http://www.example.com",
+ [ Foo => 1, bar => 2 ], "FooBar\n");
+ok($req->as_string, <<EOT);
+GET http://www.example.com
+Bar: 2
+Foo: 1
+
+FooBar
+EOT
+
+$req->clear;
+ok($req->as_string, <<EOT);
+GET http://www.example.com
+
+EOT
--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Test qw(plan ok);
+plan tests => 39;
+
+use HTTP::Message;
+use HTTP::Request::Common qw(POST);
+
+my $m = HTTP::Message->new;
+
+ok(ref($m->headers), "HTTP::Headers");
+ok($m->headers_as_string, "");
+ok($m->content, "");
+ok(j($m->parts), "");
+ok($m->as_string, "\n");
+
+my $m_clone = $m->clone;
+$m->push_header("Foo", 1);
+$m->add_content("foo");
+
+ok($m_clone->as_string, "\n");
+ok($m->headers_as_string, "Foo: 1\n");
+ok($m->header("Foo"), 1);
+ok($m->as_string, "Foo: 1\n\nfoo\n");
+ok($m->as_string("\r\n"), "Foo: 1\r\n\r\nfoo");
+ok(j($m->parts), "");
+
+$m->content_type("message/foo");
+$m->content(<<EOT);
+H1: 1
+H2: 2
+ 3
+H3: abc
+
+FooBar
+EOT
+
+my @parts = $m->parts;
+ok(@parts, 1);
+my $m2 = $parts[0];
+ok(ref($m2), "HTTP::Message");
+
+ok($m2->header("h1"), 1);
+ok($m2->header("h2"), "2\n 3");
+ok($m2->header("h3"), " abc");
+ok($m2->content, "FooBar\n");
+ok($m2->as_string, $m->content);
+ok(j($m2->parts), "");
+
+$m = POST("http://www.example.com",
+ Content_Type => 'form-data',
+ Content => [ foo => 1, bar => 2 ]);
+ok($m->content_type, "multipart/form-data");
+@parts = $m->parts;
+ok(@parts, 2);
+ok($parts[0]->header("Content-Disposition"), 'form-data; name="foo"');
+ok($parts[0]->content, 1);
+ok($parts[1]->header("Content-Disposition"), 'form-data; name="bar"');
+ok($parts[1]->content, 2);
+
+$m = HTTP::Message->new;
+$m->content_type("message/http");
+$m->content(<<EOT);
+GET / HTTP/1.0
+Host: example.com
+
+How is this?
+EOT
+
+@parts = $m->parts;
+ok(@parts, 1);
+ok($parts[0]->method, "GET");
+ok($parts[0]->uri, "/");
+ok($parts[0]->protocol, "HTTP/1.0");
+ok($parts[0]->header("Host"), "example.com");
+ok($parts[0]->content, "How is this?\n");
+
+$m = HTTP::Message->new;
+$m->content_type("message/http");
+$m->content(<<EOT);
+HTTP/1.1 200 OK
+Content-Type : text/html
+
+<H1>Hello world!</H1>
+EOT
+
+@parts = $m->parts;
+ok(@parts, 1);
+ok($parts[0]->code, 200);
+ok($parts[0]->message, "OK");
+ok($parts[0]->protocol, "HTTP/1.1");
+ok($parts[0]->content_type, "text/html");
+ok($parts[0]->content, "<H1>Hello world!</H1>\n");
+
+$m->parts(HTTP::Request->new("GET", "http://www.example.com"));
+ok($m->as_string, "Content-Type: message/http\n\nGET http://www.example.com\r\n\r\n");
+
+$m = HTTP::Request->new("PUT", "http://www.example.com");
+$m->parts(HTTP::Message->new([Foo => 1], "abc\n"));
+ok($m->as_string, <<EOT);
+PUT http://www.example.com
+Content-Type: multipart/mixed; boundary=xYzZY
+
+--xYzZY\r
+Foo: 1\r
+\r
+abc
+\r
+--xYzZY--\r
+EOT
+
+sub j { join(":", @_) }
--- /dev/null
+#!perl -w
+
+use strict;
+use Test qw(plan ok skip);
+
+plan tests => 125;
+
+require HTTP::Message;
+use Config qw(%Config);
+
+my($m, $m2, @parts);
+
+$m = HTTP::Message->new;
+ok($m);
+ok(ref($m), "HTTP::Message");
+ok(ref($m->headers), "HTTP::Headers");
+ok($m->as_string, "\n");
+ok($m->headers->as_string, "");
+ok($m->headers_as_string, "");
+ok($m->content, "");
+
+$m->header("Foo", 1);
+ok($m->as_string, "Foo: 1\n\n");
+
+$m2 = HTTP::Message->new($m->headers);
+$m2->header(bar => 2);
+ok($m->as_string, "Foo: 1\n\n");
+ok($m2->as_string, "Bar: 2\nFoo: 1\n\n");
+ok($m2->dump, "Bar: 2\nFoo: 1\n\n(no content)\n");
+
+$m2 = HTTP::Message->new($m->headers, "foo");
+ok($m2->as_string, "Foo: 1\n\nfoo\n");
+ok($m2->as_string("<<\n"), "Foo: 1<<\n<<\nfoo");
+$m2 = HTTP::Message->new($m->headers, "foo\n");
+ok($m2->as_string, "Foo: 1\n\nfoo\n");
+
+$m = HTTP::Message->new([a => 1, b => 2], "abc");
+ok($m->as_string, "A: 1\nB: 2\n\nabc\n");
+
+$m = HTTP::Message->parse("");
+ok($m->as_string, "\n");
+$m = HTTP::Message->parse("\n");
+ok($m->as_string, "\n");
+$m = HTTP::Message->parse("\n\n");
+ok($m->as_string, "\n\n");
+ok($m->content, "\n");
+
+$m = HTTP::Message->parse("foo");
+ok($m->as_string, "\nfoo\n");
+$m = HTTP::Message->parse("foo: 1");
+ok($m->as_string, "Foo: 1\n\n");
+$m = HTTP::Message->parse("foo_bar: 1");
+ok($m->as_string, "Foo_bar: 1\n\n");
+$m = HTTP::Message->parse("foo: 1\n\nfoo");
+ok($m->as_string, "Foo: 1\n\nfoo\n");
+$m = HTTP::Message->parse(<<EOT);
+FOO : 1
+ 2
+ 3
+ 4
+bar:
+ 1
+Baz: 1
+
+foobarbaz
+EOT
+ok($m->as_string, <<EOT);
+Bar:
+ 1
+Baz: 1
+Foo: 1
+ 2
+ 3
+ 4
+
+foobarbaz
+EOT
+
+$m = HTTP::Message->parse(<<EOT);
+Date: Fri, 18 Feb 2005 18:33:46 GMT
+Connection: close
+Content-Type: text/plain
+
+foo:bar
+second line
+EOT
+ok($m->content(""), <<EOT);
+foo:bar
+second line
+EOT
+ok($m->as_string, <<EOT);
+Connection: close
+Date: Fri, 18 Feb 2005 18:33:46 GMT
+Content-Type: text/plain
+
+EOT
+
+$m = HTTP::Message->parse(" abc\nfoo: 1\n");
+ok($m->as_string, "\n abc\nfoo: 1\n");
+$m = HTTP::Message->parse(" foo : 1\n");
+ok($m->as_string, "\n foo : 1\n");
+$m = HTTP::Message->parse("\nfoo: bar\n");
+ok($m->as_string, "\nfoo: bar\n");
+
+$m = HTTP::Message->new([a => 1, b => 2], "abc");
+ok($m->content("foo\n"), "abc");
+ok($m->content, "foo\n");
+
+$m->add_content("bar");
+ok($m->content, "foo\nbar");
+$m->add_content(\"\n");
+ok($m->content, "foo\nbar\n");
+
+ok(ref($m->content_ref), "SCALAR");
+ok(${$m->content_ref}, "foo\nbar\n");
+${$m->content_ref} =~ s/[ao]/i/g;
+ok($m->content, "fii\nbir\n");
+
+$m->clear;
+ok($m->headers->header_field_names, 0);
+ok($m->content, "");
+
+ok($m->parts, undef);
+$m->parts(HTTP::Message->new,
+ HTTP::Message->new([a => 1], "foo"),
+ HTTP::Message->new(undef, "bar\n"),
+ );
+ok($m->parts->as_string, "\n");
+
+my $str = $m->as_string;
+$str =~ s/\r/<CR>/g;
+ok($str, <<EOT);
+Content-Type: multipart/mixed; boundary=xYzZY
+
+--xYzZY<CR>
+<CR>
+<CR>
+--xYzZY<CR>
+A: 1<CR>
+<CR>
+foo<CR>
+--xYzZY<CR>
+<CR>
+bar
+<CR>
+--xYzZY--<CR>
+EOT
+
+$m2 = HTTP::Message->new;
+$m2->parts($m);
+
+$str = $m2->as_string;
+$str =~ s/\r/<CR>/g;
+ok($str =~ /boundary=(\S+)/);
+
+
+ok($str, <<EOT);
+Content-Type: multipart/mixed; boundary=$1
+
+--$1<CR>
+Content-Type: multipart/mixed; boundary=xYzZY<CR>
+<CR>
+--xYzZY<CR>
+<CR>
+<CR>
+--xYzZY<CR>
+A: 1<CR>
+<CR>
+foo<CR>
+--xYzZY<CR>
+<CR>
+bar
+<CR>
+--xYzZY--<CR>
+<CR>
+--$1--<CR>
+EOT
+
+@parts = $m2->parts;
+ok(@parts, 1);
+
+@parts = $parts[0]->parts;
+ok(@parts, 3);
+ok($parts[1]->header("A"), 1);
+
+$m2->parts([HTTP::Message->new]);
+@parts = $m2->parts;
+ok(@parts, 1);
+
+$m2->parts([]);
+@parts = $m2->parts;
+ok(@parts, 0);
+
+$m->clear;
+$m2->clear;
+
+$m = HTTP::Message->new([content_type => "message/http; boundary=aaa",
+ ],
+ <<EOT);
+GET / HTTP/1.1
+Host: www.example.com:8008
+
+EOT
+
+@parts = $m->parts;
+ok(@parts, 1);
+$m2 = $parts[0];
+ok(ref($m2), "HTTP::Request");
+ok($m2->method, "GET");
+ok($m2->uri, "/");
+ok($m2->protocol, "HTTP/1.1");
+ok($m2->header("Host"), "www.example.com:8008");
+ok($m2->content, "");
+
+$m->content(<<EOT);
+HTTP/1.0 200 OK
+Content-Type: text/plain
+
+Hello
+EOT
+
+$m2 = $m->parts;
+ok(ref($m2), "HTTP::Response");
+ok($m2->protocol, "HTTP/1.0");
+ok($m2->code, "200");
+ok($m2->message, "OK");
+ok($m2->content_type, "text/plain");
+ok($m2->content, "Hello\n");
+
+eval { $m->parts(HTTP::Message->new, HTTP::Message->new) };
+ok($@);
+
+$m->add_part(HTTP::Message->new([a=>[1..3]], "a"));
+$str = $m->as_string;
+$str =~ s/\r/<CR>/g;
+ok($str, <<EOT);
+Content-Type: multipart/mixed; boundary=xYzZY
+
+--xYzZY<CR>
+Content-Type: message/http; boundary=aaa<CR>
+<CR>
+HTTP/1.0 200 OK
+Content-Type: text/plain
+
+Hello
+<CR>
+--xYzZY<CR>
+A: 1<CR>
+A: 2<CR>
+A: 3<CR>
+<CR>
+a<CR>
+--xYzZY--<CR>
+EOT
+
+$m->add_part(HTTP::Message->new([b=>[1..3]], "b"));
+
+$str = $m->as_string;
+$str =~ s/\r/<CR>/g;
+ok($str, <<EOT);
+Content-Type: multipart/mixed; boundary=xYzZY
+
+--xYzZY<CR>
+Content-Type: message/http; boundary=aaa<CR>
+<CR>
+HTTP/1.0 200 OK
+Content-Type: text/plain
+
+Hello
+<CR>
+--xYzZY<CR>
+A: 1<CR>
+A: 2<CR>
+A: 3<CR>
+<CR>
+a<CR>
+--xYzZY<CR>
+B: 1<CR>
+B: 2<CR>
+B: 3<CR>
+<CR>
+b<CR>
+--xYzZY--<CR>
+EOT
+
+$m = HTTP::Message->new;
+$m->add_part(HTTP::Message->new([a=>[1..3]], "a"));
+ok($m->header("Content-Type"), "multipart/mixed; boundary=xYzZY");
+$str = $m->as_string;
+$str =~ s/\r/<CR>/g;
+ok($str, <<EOT);
+Content-Type: multipart/mixed; boundary=xYzZY
+
+--xYzZY<CR>
+A: 1<CR>
+A: 2<CR>
+A: 3<CR>
+<CR>
+a<CR>
+--xYzZY--<CR>
+EOT
+
+$m = HTTP::Message->new;
+$m->content_ref(\my $foo);
+ok($m->content_ref, \$foo);
+$foo = "foo";
+ok($m->content, "foo");
+$m->add_content("bar");
+ok($foo, "foobar");
+ok($m->as_string, "\nfoobar\n");
+$m->content_type("message/foo");
+$m->parts(HTTP::Message->new(["h", "v"], "C"));
+ok($foo, "H: v\r\n\r\nC");
+$foo =~ s/C/c/;
+$m2 = $m->parts;
+ok($m2->content, "c");
+
+$m = HTTP::Message->new;
+$foo = [];
+$m->content($foo);
+ok($m->content, $foo);
+ok(${$m->content_ref}, $foo);
+ok(${$m->content_ref([])}, $foo);
+ok($m->content_ref != $foo);
+eval {$m->add_content("x")};
+ok($@ && $@ =~ /^Can't append to ARRAY content/);
+
+$foo = sub { "foo" };
+$m->content($foo);
+ok($m->content, $foo);
+ok(${$m->content_ref}, $foo);
+
+$m->content_ref($foo);
+ok($m->content, $foo);
+ok($m->content_ref, $foo);
+
+eval {$m->content_ref("foo")};
+ok($@ && $@ =~ /^Setting content_ref to a non-ref/);
+
+$m->content_ref(\"foo");
+eval {$m->content("bar")};
+ok($@ && $@ =~ /^Modification of a read-only value/);
+
+$foo = "foo";
+$m->content_ref(\$foo);
+ok($m->content("bar"), "foo");
+ok($foo, "bar");
+ok($m->content, "bar");
+ok($m->content_ref, \$foo);
+
+$m = HTTP::Message->new;
+$m->content("fo=6F");
+ok($m->decoded_content, "fo=6F");
+$m->header("Content-Encoding", "quoted-printable");
+ok($m->decoded_content, "foo");
+
+$m = HTTP::Message->new;
+$m->header("Content-Encoding", "gzip, base64");
+$m->content_type("text/plain; charset=UTF-8");
+$m->content("H4sICFWAq0ECA3h4eAB7v3u/R6ZCSUZqUarCoxm7uAAZKHXiEAAAAA==\n");
+
+my $NO_ENCODE = $] < 5.008 || ($Config{'extensions'} !~ /\bEncode\b/)
+ ? "No Encode module" : "";
+$@ = "";
+skip($NO_ENCODE, sub { eval { $m->decoded_content } }, "\x{FEFF}Hi there \x{263A}\n");
+ok($@ || "", "");
+ok($m->content, "H4sICFWAq0ECA3h4eAB7v3u/R6ZCSUZqUarCoxm7uAAZKHXiEAAAAA==\n");
+
+$m2 = $m->clone;
+ok($m2->decode);
+ok($m2->header("Content-Encoding"), undef);
+ok($m2->content, qr/Hi there/);
+
+ok(grep { $_ eq "gzip" } $m->decodable);
+
+my $tmp = MIME::Base64::decode($m->content);
+$m->content($tmp);
+$m->header("Content-Encoding", "gzip");
+$@ = "";
+skip($NO_ENCODE, sub { eval { $m->decoded_content } }, "\x{FEFF}Hi there \x{263A}\n");
+ok($@ || "", "");
+ok($m->content, $tmp);
+
+$m->remove_header("Content-Encoding");
+$m->content("a\xFF");
+
+my $BAD_ENCODE = $NO_ENCODE || !(eval { require Encode; defined(Encode::decode("UTF-8", "\xff")) });
+
+skip($BAD_ENCODE, sub { $m->decoded_content }, "a\x{FFFD}");
+skip($BAD_ENCODE, sub { $m->decoded_content(charset_strict => 1) }, undef);
+
+$m->header("Content-Encoding", "foobar");
+ok($m->decoded_content, undef);
+ok($@ =~ /^Don't know how to decode Content-Encoding 'foobar'/);
+
+my $err = 0;
+eval {
+ $m->decoded_content(raise_error => 1);
+ $err++;
+};
+ok($@ =~ /Don't know how to decode Content-Encoding 'foobar'/);
+ok($err, 0);
+
+if ($] >= 5.008001) {
+ eval {
+ HTTP::Message->new([], "\x{263A}");
+ };
+ ok($@ =~ /bytes/);
+ $m = HTTP::Message->new;
+ eval {
+ $m->add_content("\x{263A}");
+ };
+ ok($@ =~ /bytes/);
+ eval {
+ $m->content("\x{263A}");
+ };
+ ok($@ =~ /bytes/);
+}
+else {
+ skip("Missing is_utf8 test", undef) for 1..3;
+}
+
+# test the add_content_utf8 method
+if ($] >= 5.008001) {
+ $m = HTTP::Message->new(["Content-Type", "text/plain; charset=UTF-8"]);
+ $m->add_content_utf8("\x{263A}");
+ $m->add_content_utf8("-\xC5");
+ ok($m->content, "\xE2\x98\xBA-\xC3\x85");
+ ok($m->decoded_content, "\x{263A}-\x{00C5}");
+}
+else {
+ skip("Missing is_utf8 test", undef) for 1..2;
+}
+
+$m = HTTP::Message->new([
+ "Content-Type", "text/plain",
+ ],
+ "Hello world!"
+);
+$m->content_length(length $m->content);
+$m->encode("deflate");
+$m->dump(prefix => "# ");
+ok($m->dump(prefix => "| "), <<'EOT');
+| Content-Encoding: deflate
+| Content-Type: text/plain
+|
+| x\x9C\xF3H\xCD\xC9\xC9W(\xCF/\xCAIQ\4\0\35\t\4^
+EOT
+$m->encode("base64", "identity");
+ok($m->as_string, <<'EOT');
+Content-Encoding: deflate, base64, identity
+Content-Type: text/plain
+
+eJzzSM3JyVcozy/KSVEEAB0JBF4=
+EOT
+if (eval { require Encode; 1 }) {
+ ok($m->decoded_content, "Hello world!");
+} else {
+ skip('Needs Encode.pm for this test', undef);
+}
+
+# Raw RFC 1951 deflate
+$m = HTTP::Message->new([
+ "Content-Type" => "text/plain",
+ "Content-Encoding" => "deflate, base64",
+ ],
+ "80jNyclXCM8vyklRBAA="
+ );
+ok($m->decoded_content, "Hello World!");
+ok(!$m->header("Client-Warning"));
+
+
+if (eval "require IO::Uncompress::Bunzip2") {
+ $m = HTTP::Message->new([
+ "Content-Type" => "text/plain",
+ "Content-Encoding" => "x-bzip2, base64",
+ ],
+ "QlpoOTFBWSZTWcvLx0QAAAHVgAAQYAAAQAYEkIAgADEAMCBoYlnQeSEMvxdyRThQkMvLx0Q=\n"
+ );
+ ok($m->decoded_content, "Hello world!\n");
+ ok($m->decode);
+ ok($m->content, "Hello world!\n");
+
+ if (eval "require IO::Compress::Bzip2") {
+ $m = HTTP::Message->new([
+ "Content-Type" => "text/plain",
+ ],
+ "Hello world!"
+ );
+ ok($m->encode("x-bzip2"));
+ ok($m->header("Content-Encoding"), "x-bzip2");
+ ok($m->content =~ /^BZh.*\0/);
+ ok($m->decoded_content, "Hello world!");
+ ok($m->decode);
+ ok($m->content, "Hello world!");
+ }
+ else {
+ skip("Need IO::Compress::Bzip2", undef) for 1..6;
+ }
+}
+else {
+ skip("Need IO::Uncompress::Bunzip2", undef) for 1..9;
+}
+
+# test decoding of XML content
+if ($] >= 5.008001) {
+ $m = HTTP::Message->new(["Content-Type", "application/xml"], "\xFF\xFE<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0\"\x001\0.\x000\0\"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0\"\0U\0T\0F\0-\x001\x006\0l\0e\0\"\0?\0>\0\n\0<\0r\0o\0o\0t\0>\0\xC9\0r\0i\0c\0<\0/\0r\0o\0o\0t\0>\0\n\0");
+ ok($m->decoded_content, "<?xml version=\"1.0\"?>\n<root>\xC9ric</root>\n");
+}
+else {
+ skip("Need perl-5.8", undef) for 1..1;
+}
--- /dev/null
+#!perl -w
+
+use Test;
+plan tests => 5;
+
+use HTTP::Request;
+use HTTP::Negotiate;
+
+
+ # ID QS Content-Type Encoding Char-Set Lang Size
+ $variants =
+ [
+ ['var1', 0.950, 'text/plain', ['uuencode',
+ 'compress'], 'iso-8859-2', 'se', 400],
+ ['var2', 1.000, 'text/html;version=2.0', 'gzip', 'iso-8859-1', 'en', 3000],
+ ['var3', 0.333, 'image/gif', undef, undef, undef, 43555],
+ ];
+
+
+# First we try a request with not accept headers
+$request = new HTTP::Request 'GET', 'http://localhost/';
+@a = choose($variants, $request);
+show_res(@a);
+expect(\@a, [['var2' => 1],
+ ['var1' => 0.95],
+ ['var3' => 0.333]
+ ]
+);
+
+
+$a = choose($variants, $request);
+print "The chosen one is '$a'\n";
+ok($a, "var2");
+
+#------------------
+
+$request = new HTTP::Request 'GET', 'http://localhost/';
+$request->header('Accept', 'text/plain; q=0.55, image/gif; mbx=10000');
+$request->push_header('Accept', 'text/*; q=0.25');
+$request->header('Accept-Language', 'no, en');
+$request->header('Accept-Charset', 'iso-8859-1');
+$request->header('Accept-Encoding', 'gzip');
+
+@a = choose($variants, $request);
+show_res(@a);
+expect(\@a, [['var2' => 0.25],
+ ['var1' => 0],
+ ['var3' => 0]
+ ]
+);
+
+$variants = [
+ ['var-en', undef, 'text/html', undef, undef, 'en', undef],
+ ['var-de', undef, 'text/html', undef, undef, 'de', undef],
+ ['var-ES', undef, 'text/html', undef, undef, 'ES', undef],
+ ['provoke-warning', undef, undef, undef, undef, 'x-no-content-type', undef],
+ ];
+
+$HTTP::Negotiate::DEBUG=1;
+$ENV{HTTP_ACCEPT_LANGUAGE}='DE,en,fr;Q=0.5,es;q=0.1';
+
+$a = choose($variants);
+
+ok($a, 'var-de');
+
+
+$variants = [
+ [ 'Canadian English' => 1.0, 'text/html', undef, undef, 'en-CA', undef ],
+ [ 'Generic English' => 1.0, 'text/html', undef, undef, 'en', undef ],
+ [ 'Non-Specific' => 1.0, 'text/html', undef, undef, undef, undef ],
+];
+
+$ENV{HTTP_ACCEPT_LANGUAGE}='en-US';
+$a = choose($variants);
+ok($a, 'Generic English');
+
+#------------------
+
+sub expect
+{
+ my($res, $exp) = @_;
+ do {
+ $a = shift @$res;
+ $b = shift @$exp;
+ last if defined($a) ne defined($b);
+ if (defined($a)) {
+ ($va, $qa) = @$a;
+ ($vb, $qb) = @$b;
+ if ($va ne $vb) {
+ print "$va == $vb ?\n";
+ ok(0);
+ return;
+ }
+ if (abs($qa - $qb) > 0.002) {
+ print "$qa ~= $qb ?\n";
+ ok(0);
+ return;
+ }
+ }
+
+ } until (!defined($a) || !defined($b));
+ ok(defined($a), defined($b));
+}
+
+sub show_res
+{
+ print "-------------\n";
+ for (@_) {
+ printf "%-6s %.3f\n", @$_;
+ }
+ print "-------------\n";
+}
--- /dev/null
+use Test;
+plan tests => 6;
+
+use LWP::UserAgent;
+$ua = LWP::UserAgent->new();
+
+$ua->protocols_forbidden(['hTtP']);
+ok(scalar(@{$ua->protocols_forbidden()}), 1);
+ok(@{$ua->protocols_forbidden()}[0], 'hTtP');
+
+$response = $ua->get('http://www.cpan.org/');
+ok($response->is_error());
+ok(!$ua->is_protocol_supported('http'));
+ok(!$ua->protocols_allowed());
+
+$ua->protocols_forbidden(undef);
+ok(!$ua->protocols_forbidden());
--- /dev/null
+# Test extra HTTP::Request methods. Basic operation is tested in the
+# message.t test suite.
+
+use strict;
+
+use Test;
+plan tests => 11;
+
+use HTTP::Request;
+
+my $req = HTTP::Request->new(GET => "http://www.example.com");
+$req->accept_decodable;
+
+ok($req->method, "GET");
+ok($req->uri, "http://www.example.com");
+ok($req->header("Accept-Encoding") =~ /\bgzip\b/); # assuming IO::Uncompress::Gunzip is there
+
+$req->dump(prefix => "# ");
+
+ok($req->method("DELETE"), "GET");
+ok($req->method, "DELETE");
+
+ok($req->uri("http:"), "http://www.example.com");
+ok($req->uri, "http:");
+
+$req->protocol("HTTP/1.1");
+
+my $r2 = HTTP::Request->parse($req->as_string);
+ok($r2->method, "DELETE");
+ok($r2->uri, "http:");
+ok($r2->protocol, "HTTP/1.1");
+ok($r2->header("Accept-Encoding"), $req->header("Accept-Encoding"));
--- /dev/null
+#!perl -w
+
+# Test extra HTTP::Response methods. Basic operation is tested in the
+# message.t test suite.
+
+use strict;
+use Test;
+plan tests => 23;
+
+use HTTP::Date;
+use HTTP::Request;
+use HTTP::Response;
+
+my $time = time;
+
+my $req = HTTP::Request->new(GET => 'http://www.sn.no');
+$req->date($time - 30);
+
+my $r = new HTTP::Response 200, "OK";
+$r->client_date($time - 20);
+$r->date($time - 25);
+$r->last_modified($time - 5000000);
+$r->request($req);
+
+#print $r->as_string;
+
+my $current_age = $r->current_age;
+
+ok($current_age >= 35 && $current_age <= 40);
+
+my $freshness_lifetime = $r->freshness_lifetime;
+ok($freshness_lifetime >= 12 * 3600);
+ok($r->freshness_lifetime(heuristic_expiry => 0), undef);
+
+my $is_fresh = $r->is_fresh;
+ok($is_fresh);
+ok($r->is_fresh(heuristic_expiry => 0), undef);
+
+print "# current_age = $current_age\n";
+print "# freshness_lifetime = $freshness_lifetime\n";
+print "# response is ";
+print " not " unless $is_fresh;
+print "fresh\n";
+
+print "# it will be fresh for ";
+print $freshness_lifetime - $current_age;
+print " more seconds\n";
+
+# OK, now we add an Expires header
+$r->expires($time);
+print "\n", $r->dump(prefix => "# ");
+
+$freshness_lifetime = $r->freshness_lifetime;
+ok($freshness_lifetime, 25);
+$r->remove_header('expires');
+
+# Now we try the 'Age' header and the Cache-Contol:
+$r->header('Age', 300);
+$r->push_header('Cache-Control', 'junk');
+$r->push_header(Cache_Control => 'max-age = 10');
+
+#print $r->as_string;
+
+$current_age = $r->current_age;
+$freshness_lifetime = $r->freshness_lifetime;
+
+print "# current_age = $current_age\n";
+print "# freshness_lifetime = $freshness_lifetime\n";
+
+ok($current_age >= 300);
+ok($freshness_lifetime, 10);
+
+ok($r->fresh_until); # should return something
+ok($r->fresh_until(heuristic_expiry => 0)); # should return something
+
+my $r2 = HTTP::Response->parse($r->as_string);
+my @h = $r2->header('Cache-Control');
+ok(@h, 2);
+
+$r->remove_header("Cache-Control");
+
+ok($r->fresh_until); # should still return something
+ok($r->fresh_until(heuristic_expiry => 0), undef);
+
+ok($r->redirects, 0);
+$r->previous($r2);
+ok($r->previous, $r2);
+ok($r->redirects, 1);
+
+$r2->previous($r->clone);
+ok($r->redirects, 2);
+for ($r->redirects) {
+ ok($_->is_success);
+}
+
+ok($r->base, $r->request->uri);
+$r->push_header("Content-Location", "/1/A/a");
+ok($r->base, "http://www.sn.no/1/A/a");
+$r->push_header("Content-Base", "/2/;a=/foo/bar");
+ok($r->base, "http://www.sn.no/2/;a=/foo/bar");
+$r->push_header("Content-Base", "/3/");
+ok($r->base, "http://www.sn.no/2/;a=/foo/bar");
--- /dev/null
+#!perl -w
+
+use Test;
+plan tests => 8;
+
+use HTTP::Status;
+
+ok(RC_OK, 200);
+
+ok(is_info(RC_CONTINUE));
+ok(is_success(RC_ACCEPTED));
+ok(is_error(RC_BAD_REQUEST));
+ok(is_redirect(RC_MOVED_PERMANENTLY));
+
+ok(!is_success(RC_NOT_FOUND));
+
+ok(status_message(0), undef);
+ok(status_message(200), "OK");
--- /dev/null
+#!perl -w
+
+use Test;
+plan tests => 8;
+
+use HTTP::Status qw(:constants :is status_message);
+
+ok(HTTP_OK, 200);
+
+ok(is_info(HTTP_CONTINUE));
+ok(is_success(HTTP_ACCEPTED));
+ok(is_error(HTTP_BAD_REQUEST));
+ok(is_redirect(HTTP_MOVED_PERMANENTLY));
+
+ok(!is_success(HTTP_NOT_FOUND));
+
+ok(status_message(0), undef);
+ok(status_message(200), "OK");
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+plan tests => 14;
+
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new;
+my $clone = $ua->clone;
+
+ok($ua->agent =~ /^libwww-perl/);
+ok(!defined $ua->proxy(ftp => "http://www.sol.no"));
+ok($ua->proxy("ftp"), "http://www.sol.no");
+
+my @a = $ua->proxy([qw(ftp http wais)], "http://proxy.foo.com");
+for (@a) { $_ = "undef" unless defined; }
+
+ok("@a", "http://www.sol.no undef undef");
+ok($ua->proxy("http"), "http://proxy.foo.com");
+ok(ref($ua->default_headers), "HTTP::Headers");
+
+$ua->default_header("Foo" => "bar", "Multi" => [1, 2]);
+ok($ua->default_headers->header("Foo"), "bar");
+ok($ua->default_header("Foo"), "bar");
+
+# Try it
+$ua->proxy(http => "loopback:");
+$ua->agent("foo/0.1");
+
+ok($ua->get("http://www.example.com", x => "y")->content, <<EOT);
+GET http://www.example.com
+User-Agent: foo/0.1
+Foo: bar
+Multi: 1
+Multi: 2
+X: y
+
+EOT
+
+ok(ref($clone->{proxy}), 'HASH');
+
+ok($ua->proxy(http => undef), "loopback:");
+ok($ua->proxy('http'), undef);
+
+my $res = $ua->get("data:text/html,%3Chtml%3E%3Chead%3E%3Cmeta%20http-equiv%3D%22Content-Script-Type%22%20content%3D%22text%2Fjavascript%22%3E%3Cmeta%20http-equiv%3D%22Content-Style-Type%22%20content%3D%22text%2Fcss%22%3E%3C%2Fhead%3E%3C%2Fhtml%3E");
+ok($res->header("Content-Style-Type", "text/css"));
+ok($res->header("Content-Script-Type", "text/javascript"));
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use lib '.';
+use Test::More tests => 12;
+use HTML::Form;
+
+my $html = do { local $/ = undef; <DATA> };
+my $form = HTML::Form->parse($html, 'foo.html' );
+isa_ok($form, 'HTML::Form');
+my $input = $form->find_input('passwd');
+isa_ok($input, 'HTML::Form::TextInput');
+
+sub set_value {
+ my $input = shift;
+ my $value = shift;
+ my $len = length($value);
+ my $old = $input->value;
+ is( $input->value($value), $old, "set value length=$len" );
+ is( $input->value, $value, "got value length=$len" );
+}
+
+{
+ is( $input->{maxlength}, 8, 'got maxlength: 8' );
+
+ set_value( $input, '1234' );
+ set_value( $input, '1234567890' );
+ ok(!$input->strict, "not strict by default");
+ $form->strict(1);
+ ok($input->strict, "input strict change when form strict change");
+ set_value( $input, '1234' );
+ eval {
+ set_value( $input, '1234567890' );
+ };
+ like($@, qr/^Input 'passwd' has maxlength '8' at /, "Exception raised");
+}
+
+__DATA__
+
+<form method="post" action="?" enctype="application/x-www-form-urlencoded" name="login">
+<div style="display:none"><input type="hidden" name="node_id" value="109"></div>
+<input type="hidden" name="op" value="login" />
+<input type="hidden" name="lastnode_id" value="109" />
+<table border="0"><tr><td><font size="2">
+Login:</font></td><td>
+<input type="text" name="user" size=10 maxlength=34 />
+</td></tr><tr><td><font size="2">
+Password</font></td><td>
+<input type="password" name="passwd" size=10 MAXLENGTH=8 />
+
+</td></tr></table><font size="2">
+<input type="checkbox" name="expires" value="+10y" />remember me
+<input type="submit" name="login" value="Login" />
+</font><br />
+<a href="?node=What%27s%20my%20password%3F">password reminder</a>
+<br />
+<a href="?node_id=101">Create A New User</a>
+</form>
+
--- /dev/null
+#!/usr/bin/perl
+
+# Test for case when multiple forms are on a page with same-named <select> fields.
+
+use strict;
+use Test::More tests => 2;
+use HTML::Form;
+
+{
+ my $test = "the settings of a previous form should not interfere with a latter form (control test with one form)";
+ my @forms = HTML::Form->parse( FakeResponse::One->new );
+ my $cat_form = $forms[0];
+ my @vals = $cat_form->param('age');
+ is_deeply(\@vals,[''], $test);
+}
+{
+ my $test = "the settings of a previous form should not interfere with a latter form (test with two forms)";
+ my @forms = HTML::Form->parse( FakeResponse::TwoForms->new );
+ my $cat_form = $forms[1];
+
+ my @vals = $cat_form->param('age');
+ is_deeply(\@vals,[''], $test);
+}
+
+####
+package FakeResponse::One;
+sub new {
+ bless {}, shift;
+}
+sub base {
+ return "http://foo.com"
+}
+sub content_charset {
+ return "iso-8859-1";
+}
+sub decoded_content {
+ my $html = qq{
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title></title>
+ </head>
+ <body>
+
+ <form name="search_cats">
+ <select name="age" onChange="jumpTo(this)" class="sap-form-item">
+ <option value="" selected="selected">Any</option>
+ <option value="young">Young</option>
+ <option value="adult">Adult</option>
+ <option value="senior">Senior</option>
+ <option value="puppy">Puppy </option>
+ </select>
+ </form>
+ </body></html>
+ };
+ return \$html;
+}
+
+#####
+package FakeResponse::TwoForms;
+sub new {
+ bless {}, shift;
+}
+sub base {
+ return "http://foo.com"
+}
+sub decoded_content {
+ my $html = qq{
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title></title>
+ </head>
+ <body>
+ <form name="search_dogs" >
+ <select name="age" onChange="jumpTo(this)" class="sap-form-item">
+ <option value="" selected="selected">Any</option>
+ <option value="young">Young</option>
+ <option value="adult">Adult</option>
+ <option value="senior">Senior</option>
+ <option value="puppy">Puppy </option>
+ </select>
+ </form>
+
+
+ <form name="search_cats">
+ <select name="age" onChange="jumpTo(this)" class="sap-form-item">
+ <option value="" selected="selected">Any</option>
+ <option value="young">Young</option>
+ <option value="adult">Adult</option>
+ <option value="senior">Senior</option>
+ <option value="puppy">Puppy </option>
+ </select>
+ </form>
+ </body></html>
+ };
+ return \$html;
+}
--- /dev/null
+#!perl -w
+
+use strict;
+use Test qw(plan ok);
+
+plan tests => 22;
+
+use HTML::Form;
+
+my $form = HTML::Form->parse(<<"EOT", base => "http://example.com", strict => 1);
+<form>
+<input type="hidden" name="hidden_1">
+
+<input type="checkbox" name="checkbox_1" value="c1_v1" CHECKED>
+<input type="checkbox" name="checkbox_1" value="c1_v2" CHECKED>
+<input type="checkbox" name="checkbox_2" value="c2_v1" CHECKED>
+
+<select name="multi_select_field" multiple="1">
+ <option> 1
+ <option> 2
+ <option> 3
+</select>
+</form>
+EOT
+
+# list names
+ok($form->param, 4);
+ok(j($form->param), "hidden_1:checkbox_1:checkbox_2:multi_select_field");
+
+# get
+ok($form->param('hidden_1'), '');
+ok($form->param('checkbox_1'), 'c1_v1');
+ok(j($form->param('checkbox_1')), 'c1_v1:c1_v2');
+ok($form->param('checkbox_2'), 'c2_v1');
+ok(j($form->param('checkbox_2')), 'c2_v1');
+ok(!defined($form->param('multi_select_field')));
+ok(j($form->param('multi_select_field')), '');
+ok(!defined($form->param('unknown')));
+ok(j($form->param('unknown')), '');
+
+# set
+eval {
+ $form->param('hidden_1', 'x');
+};
+ok($@, qr/readonly/);
+ok(j($form->param('hidden_1')), '');
+
+eval {
+ $form->param('checkbox_1', 'foo');
+};
+ok($@, qr/Illegal value/);
+ok(j($form->param('checkbox_1')), 'c1_v1:c1_v2');
+
+$form->param('checkbox_1', 'c1_v2');
+ok(j($form->param('checkbox_1')), 'c1_v2');
+$form->param('checkbox_1', 'c1_v2');
+ok(j($form->param('checkbox_1')), 'c1_v2');
+$form->param('checkbox_1', []);
+ok(j($form->param('checkbox_1')), '');
+$form->param('checkbox_1', ['c1_v2', 'c1_v1']);
+ok(j($form->param('checkbox_1')), 'c1_v1:c1_v2');
+$form->param('checkbox_1', []);
+ok(j($form->param('checkbox_1')), '');
+$form->param('checkbox_1', 'c1_v2', 'c1_v1');
+ok(j($form->param('checkbox_1')), 'c1_v1:c1_v2');
+
+$form->param('multi_select_field', 3, 2);
+ok(j($form->param('multi_select_field')), "2:3");
+
+sub j {
+ join(":", @_);
+}
--- /dev/null
+#!perl -w
+
+use strict;
+use Test qw(plan ok);
+
+plan tests => 12;
+
+use HTML::Form;
+
+my $form = HTML::Form->parse(<<"EOT", base => "http://example.com", strict => 1);
+<form>
+<input name="n1" id="id1" class="A" value="1">
+<input id="id2" class="A" value="2">
+<input id="id3" class="B" value="3">
+<select id="id4">
+ <option>1
+ <option>2
+ <option>3
+</selector>
+<input id="#foo" name="#bar" class=".D" disabled>
+</form>
+EOT
+
+#$form->dump;
+
+ok($form->value("n1"), 1);
+ok($form->value("^n1"), 1);
+ok($form->value("#id1"), 1);
+ok($form->value(".A"), 1);
+ok($form->value("#id2"), 2);
+ok($form->value(".B"), 3);
+
+ok(j(map $_->value, $form->find_input(".A")), "1:2");
+
+$form->find_input("#id2")->name("n2");
+$form->value("#id2", 22);
+ok($form->click->uri->query, "n1=1&n2=22");
+
+# try some odd names
+ok($form->find_input("##foo")->name, "#bar");
+ok($form->find_input("#bar"), undef);
+ok($form->find_input("^#bar")->class, ".D");
+ok($form->find_input("..D")->id, "#foo");
+
+sub j {
+ join(":", @_);
+}
--- /dev/null
+#!perl -w
+
+use strict;
+use Test qw(plan ok);
+
+plan tests => 127;
+
+use HTML::Form;
+
+my @warn;
+$SIG{__WARN__} = sub { push(@warn, $_[0]) };
+
+my @f = HTML::Form->parse("", "http://localhost/");
+ok(@f, 0);
+
+@f = HTML::Form->parse(<<'EOT', "http://localhost/");
+<form action="abc" name="foo">
+<input name="name">
+</form>
+<form></form>
+EOT
+
+ok(@f, 2);
+
+my $f = shift @f;
+ok($f->value("name"), "");
+ok($f->dump, "GET http://localhost/abc [foo]\n name= (text)\n");
+
+my $req = $f->click;
+ok($req->method, "GET");
+ok($req->uri, "http://localhost/abc?name=");
+
+$f->value(name => "Gisle Aas");
+$req = $f->click;
+ok($req->method, "GET");
+ok($req->uri, "http://localhost/abc?name=Gisle+Aas");
+
+ok($f->attr("name"), "foo");
+ok($f->attr("method"), undef);
+
+$f = shift @f;
+ok($f->method, "GET");
+ok($f->action, "http://localhost/");
+ok($f->enctype, "application/x-www-form-urlencoded");
+ok($f->dump, "GET http://localhost/\n");
+
+# try some more advanced inputs
+$f = HTML::Form->parse(<<'EOT', base => "http://localhost/", verbose => 1);
+<form method=post>
+ <input name=i type="image" src="foo.gif">
+ <input name=c type="checkbox" checked>
+ <input name=r type="radio" value="a">
+ <input name=r type="radio" value="b" checked>
+ <input name=t type="text">
+ <input name=p type="PASSWORD">
+ <input name=h type="hidden" value=xyzzy>
+ <input name=s type="submit" value="Doit!">
+ <input name=r type="reset">
+ <input name=b type="button">
+ <input name=f type="file" value="foo.txt">
+ <input name=x type="xyzzy">
+
+ <textarea name=a>
+abc
+ </textarea>
+
+ <select name=s>
+ <option>Foo
+ <option value="bar" selected>Bar
+ </select>
+
+ <select name=m multiple>
+ <option selected value="a">Foo
+ <option selected value="b">Bar
+ </select>
+</form>
+EOT
+
+#print $f->dump;
+#print $f->click->as_string;
+
+ok($f->click->as_string, <<'EOT');
+POST http://localhost/
+Content-Length: 69
+Content-Type: application/x-www-form-urlencoded
+
+i.x=1&i.y=1&c=on&r=b&t=&p=&h=xyzzy&f=&x=&a=%0Aabc%0A+++&s=bar&m=a&m=b
+EOT
+
+ok(@warn, 1);
+ok($warn[0] =~ /^Unknown input type 'xyzzy'/);
+@warn = ();
+
+$f = HTML::Form->parse(<<'EOT', "http://localhost/");
+<form>
+ <input type=submit value="Upload it!" name=n disabled>
+ <input type=image alt="Foo">
+ <input type=text name=t value="1">
+</form>
+EOT
+
+#$f->dump;
+ok($f->click->as_string, <<'EOT');
+GET http://localhost/?x=1&y=1&t=1
+
+EOT
+
+# test file upload
+$f = HTML::Form->parse(<<'EOT', "http://localhost/");
+<form method=post enctype="MULTIPART/FORM-DATA">
+ <input name=f type=file value="/etc/passwd">
+ <input type=submit value="Upload it!">
+</form>
+EOT
+
+#print $f->dump;
+#print $f->click->as_string;
+
+ok($f->click->as_string, <<'EOT');
+POST http://localhost/
+Content-Length: 0
+Content-Type: multipart/form-data; boundary=none
+
+EOT
+
+my $filename = sprintf "foo-%08d.txt", $$;
+die if -e $filename;
+
+open(FILE, ">$filename") || die;
+binmode(FILE);
+print FILE "This is some text\n";
+close(FILE) || die;
+
+$f->value(f => $filename);
+
+#print $f->click->as_string;
+
+ok($f->click->as_string, <<"EOT");
+POST http://localhost/
+Content-Length: 139
+Content-Type: multipart/form-data; boundary=xYzZY
+
+--xYzZY\r
+Content-Disposition: form-data; name="f"; filename="$filename"\r
+Content-Type: text/plain\r
+\r
+This is some text
+\r
+--xYzZY--\r
+EOT
+
+unlink($filename) || warn "Can't unlink '$filename': $!";
+
+ok(@warn, 0);
+
+# Try to parse form HTTP::Response directly
+{
+ package MyResponse;
+ use vars qw(@ISA);
+ require HTTP::Response;
+ @ISA = ('HTTP::Response');
+
+ sub base { "http://www.example.com" }
+}
+my $response = MyResponse->new(200, 'OK');
+$response->content("<form><input type=text value=42 name=x></form>");
+
+$f = HTML::Form->parse($response);
+
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com?x=42
+
+EOT
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form>
+ <input type=checkbox name=x> I like it!
+</form>
+EOT
+
+$f->find_input("x")->check;
+
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com?x=on
+
+EOT
+
+$f->value("x", "off");
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com
+
+EOT
+
+$f->value("x", "I like it!");
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com?x=on
+
+EOT
+
+$f->value("x", "I LIKE IT!");
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com?x=on
+
+EOT
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form>
+<select name=x>
+ <option value=1>one
+ <option value=2>two
+ <option>3
+</select>
+<select name=y multiple>
+ <option value=1>
+</select>
+</form>
+EOT
+
+$f->value("x", "one");
+
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com?x=1
+
+EOT
+
+$f->value("x", "TWO");
+ok($f->click->as_string, <<"EOT");
+GET http://www.example.com?x=2
+
+EOT
+
+ok(join(":", $f->find_input("x")->value_names), "one:two:3");
+ok(join(":", map $_->name, $f->find_input(undef, "option")), "x:y");
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form>
+<input name=x value=1 disabled>
+<input name=y value=2 READONLY type=TEXT>
+<input name=z value=3 type=hidden>
+</form>
+EOT
+
+ok($f->value("x"), 1);
+ok($f->value("y"), 2);
+ok($f->value("z"), 3);
+ok($f->click->uri->query, "y=2&z=3");
+
+my $input = $f->find_input("x");
+ok($input->type, "text");
+ok(!$input->readonly);
+ok($input->disabled);
+ok($input->disabled(0));
+ok(!$input->disabled);
+ok($f->click->uri->query, "x=1&y=2&z=3");
+
+$input = $f->find_input("y");
+ok($input->type, "text");
+ok($input->readonly);
+ok(!$input->disabled);
+$input->value(22);
+ok($f->click->uri->query, "x=1&y=22&z=3");
+
+$input->strict(1);
+eval {
+ $input->value(23);
+};
+ok($@ =~ /^Input 'y' is readonly/);
+
+ok($input->readonly(0));
+ok(!$input->readonly);
+
+$input->value(222);
+ok(@warn, 0);
+ok($f->click->uri->query, "x=1&y=222&z=3");
+
+$input = $f->find_input("z");
+ok($input->type, "hidden");
+ok($input->readonly);
+ok(!$input->disabled);
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form>
+<textarea name="t" type="hidden">
+<foo>
+</textarea>
+<select name=s value=s>
+ <option name=y>Foo
+ <option name=x value=bar type=x>Bar
+</form>
+EOT
+
+ok($f->value("t"), "\n<foo>\n");
+ok($f->value("s"), "Foo");
+ok(join(":", $f->find_input("s")->possible_values), "Foo:bar");
+ok(join(":", $f->find_input("s")->other_possible_values), "bar");
+ok($f->value("s", "bar"), "Foo");
+ok($f->value("s"), "bar");
+ok(join(":", $f->find_input("s")->other_possible_values), "");
+
+
+$f = HTML::Form->parse(<<EOT, base => "http://www.example.com", strict => 1);
+<form>
+
+<input type=radio name=r0 value=1 disabled>one
+
+<input type=radio name=r1 value=1 disabled>one
+<input type=radio name=r1 value=2>two
+<input type=radio name=r1 value=3>three
+
+<input type=radio name=r2 value=1>one
+<input type=radio name=r2 value=2 disabled>two
+<input type=radio name=r2 value=3>three
+
+<select name=s0>
+ <option disabled>1
+</select>
+
+<select name=s1>
+ <option disabled>1
+ <option>2
+ <option>3
+</select>
+
+<select name=s2>
+ <option>1
+ <option disabled>2
+ <option>3
+</select>
+
+<select name=s3 disabled>
+ <option>1
+ <option disabled>2
+ <option>3
+</select>
+
+<select name=m0 multiple>
+ <option disabled>1
+</select>
+
+<select name=m1 multiple="">
+ <option disabled>1
+ <option>2
+ <option>3
+</select>
+
+<select name=m2 multiple>
+ <option>1
+ <option disabled>2
+ <option>3
+</select>
+
+<select name=m3 disabled multiple>
+ <option>1
+ <option disabled>2
+ <option>3
+</select>
+
+</form>
+
+EOT
+#print $f->dump;
+ok($f->find_input("r0")->disabled);
+ok(!eval {$f->value("r0", 1);});
+ok($@ && $@ =~ /^The value '1' has been disabled for field 'r0'/);
+ok($f->find_input("r0")->disabled(0));
+ok(!$f->find_input("r0")->disabled);
+ok($f->value("r0", 1), undef);
+ok($f->value("r0"), 1);
+
+ok(!$f->find_input("r1")->disabled);
+ok($f->value("r1", 2), undef);
+ok($f->value("r1"), 2);
+ok(!eval {$f->value("r1", 1);});
+ok($@ && $@ =~ /^The value '1' has been disabled for field 'r1'/);
+
+ok($f->value("r2", 1), undef);
+ok(!eval {$f->value("r2", 2);});
+ok($@ && $@ =~ /^The value '2' has been disabled for field 'r2'/);
+ok(!eval {$f->value("r2", "two");});
+ok($@ && $@ =~ /^The value 'two' has been disabled for field 'r2'/);
+ok(!$f->find_input("r2")->disabled(1));
+ok(!eval {$f->value("r2", 1);});
+ok($@ && $@ =~ /^The value '1' has been disabled for field 'r2'/);
+ok($f->find_input("r2")->disabled(0));
+ok(!$f->find_input("r2")->disabled);
+ok($f->value("r2", 2), 1);
+
+ok($f->find_input("s0")->disabled);
+ok(!$f->find_input("s1")->disabled);
+ok(!$f->find_input("s2")->disabled);
+ok($f->find_input("s3")->disabled);
+
+ok(!eval {$f->value("s1", 1);});
+ok($@ && $@ =~ /^The value '1' has been disabled for field 's1'/);
+
+ok($f->find_input("m0")->disabled);
+ok($f->find_input("m1", undef, 1)->disabled);
+ok(!$f->find_input("m1", undef, 2)->disabled);
+ok(!$f->find_input("m1", undef, 3)->disabled);
+
+ok(!$f->find_input("m2", undef, 1)->disabled);
+ok($f->find_input("m2", undef, 2)->disabled);
+ok(!$f->find_input("m2", undef, 3)->disabled);
+
+ok($f->find_input("m3", undef, 1)->disabled);
+ok($f->find_input("m3", undef, 2)->disabled);
+ok($f->find_input("m3", undef, 3)->disabled);
+
+$f->find_input("m3", undef, 2)->disabled(0);
+ok(!$f->find_input("m3", undef, 2)->disabled);
+ok($f->find_input("m3", undef, 2)->value(2), undef);
+ok($f->find_input("m3", undef, 2)->value(undef), 2);
+
+$f->find_input("m3", undef, 2)->disabled(1);
+ok($f->find_input("m3", undef, 2)->disabled);
+ok(eval{$f->find_input("m3", undef, 2)->value(2)}, undef);
+ok($@ && $@ =~ /^The value '2' has been disabled/);
+ok(eval{$f->find_input("m3", undef, 2)->value(undef)}, undef);
+ok($@ && $@ =~ /^The 'm3' field can't be unchecked/);
+
+# multiple select with the same name [RT#18993]
+$f = HTML::Form->parse(<<EOT, "http://localhost/");
+<form action="target.html" method="get">
+<select name="bug">
+<option selected value=hi>hi
+<option value=mom>mom
+</select>
+<select name="bug">
+<option value=hi>hi
+<option selected value=mom>mom
+</select>
+<select name="nobug">
+<option value=hi>hi
+<option selected value=mom>mom
+</select>
+EOT
+ok(join("|", $f->form), "bug|hi|bug|mom|nobug|mom");
+
+# Try a disabled radiobutton:
+$f = HTML::Form->parse(<<EOT, "http://localhost/");
+<form>
+ <input disabled checked type=radio name=f value=a>
+ <input type=hidden name=f value=b>
+</form>
+
+EOT
+
+ok($f->click->as_string, <<'EOT');
+GET http://localhost/?f=b
+
+EOT
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<!-- from http://www.blooberry.com/indexdot/html/tagpages/k/keygen.htm -->
+<form METHOD="post" ACTION="http://example.com/secure/keygen/test.cgi" ENCTYPE="application/x-www-form-urlencoded">
+ <keygen NAME="randomkey" CHALLENGE="1234567890">
+ <input TYPE="text" NAME="Field1" VALUE="Default Text">
+</form>
+EOT
+
+ok($f->find_input("randomkey"));
+ok($f->find_input("randomkey")->challenge, "1234567890");
+ok($f->find_input("randomkey")->keytype, "rsa");
+ok($f->click->as_string, <<EOT);
+POST http://example.com/secure/keygen/test.cgi
+Content-Length: 19
+Content-Type: application/x-www-form-urlencoded
+
+Field1=Default+Text
+EOT
+
+$f->value(randomkey => "foo");
+ok($f->click->as_string, <<EOT);
+POST http://example.com/secure/keygen/test.cgi
+Content-Length: 33
+Content-Type: application/x-www-form-urlencoded
+
+randomkey=foo&Field1=Default+Text
+EOT
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form ACTION="http://example.com/">
+ <select name=s>
+ <option>1
+ <option>2
+ <input name=t>
+</form>
+EOT
+
+ok($f);
+ok($f->find_input("t"));
+
+
+@f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form ACTION="http://example.com/">
+ <select name=s>
+ <option>1
+ <option>2
+</form>
+<form ACTION="http://example.com/">
+ <input name=t>
+</form>
+EOT
+
+ok(@f, 2);
+ok($f[0]->find_input("s"));
+ok($f[1]->find_input("t"));
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form ACTION="http://example.com/">
+ <fieldset>
+ <legend>Radio Buttons with Labels</legend>
+ <label>
+ <input type=radio name=r0 value=0 />zero
+ </label>
+ <label>one
+ <input type=radio name=r1 value=1>
+ </label>
+ <label for="r2">two</label>
+ <input type=radio name=r2 id=r2 value=2>
+ <label>
+ <span>nested</span>
+ <input type=radio name=r3 value=3>
+ </label>
+ <label>
+ before
+ and <input type=radio name=r4 value=4>
+ after
+ </label>
+ </fieldset>
+</form>
+EOT
+
+ok(join(":", $f->find_input("r0")->value_names), "zero");
+ok(join(":", $f->find_input("r1")->value_names), "one");
+ok(join(":", $f->find_input("r2")->value_names), "two");
+ok(join(":", $f->find_input("r3")->value_names), "nested");
+ok(join(":", $f->find_input("r4")->value_names), "before and after");
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form>
+ <table>
+ <TR>
+ <TD align="left" colspan="2">
+ Keep me informed on the progress of this election
+ <INPUT type="checkbox" id="keep_informed" name="keep_informed" value="yes" checked>
+ </TD>
+ </TR>
+ <TR>
+ <TD align=left colspan=2>
+ <BR><B>The place you are registered to vote:</B>
+ </TD>
+ </TR>
+ <TR>
+ <TD valign="middle" height="2" align="right">
+ <A name="Note1back">County or Parish</A>
+ </TD>
+ <TD align="left">
+ <INPUT type="text" id="reg_county" size="40" name="reg_county" value="">
+ </TD>
+ <TD align="left" width="10">
+ <A href="#Note2" class="c2" tabindex="-1">Note 2</A>
+ </TD>
+ </TR>
+ </table>
+</form>
+EOT
+ok(join(":", $f->find_input("keep_informed")->value_names), "off:");
+
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form action="test" method="post">
+<select name="test">
+<option value="1">One</option>
+<option value="2">Two</option>
+<option disabled="disabled" value="3">Three</option>
+</select>
+<input type="submit" name="submit" value="Go">
+</form>
+</body>
+</html>
+EOT
+ok(join(":", $f->find_input("test")->possible_values), "1:2");
+ok(join(":", $f->find_input("test")->other_possible_values), "2");
+
+@warn = ();
+$f = HTML::Form->parse(<<EOT, "http://www.example.com");
+<form>
+<select id="myselect">
+<option>one</option>
+<option>two</option>
+<option>three</option>
+</select>
+</form>
+EOT
+ok(@warn, 0);
--- /dev/null
+#!perl -w
+
+use Test;
+
+use strict;
+use File::Listing;
+use LWP::Simple;
+
+# some sample URLs
+my @urls = (
+ "http://www.apache.org/dist/apr/?C=N&O=D",
+ "http://perl.apache.org/rpm/distrib/",
+ "http://www.cpan.org/modules/by-module/",
+ );
+plan tests => scalar(@urls);
+
+for my $url (@urls) {
+ print "# $url\n";
+ my $dir = get($url);
+ unless ($dir) {
+ print "# Can't get document at $url\n";
+ ok(0);
+ next;
+ }
+ my @listing = parse_dir($dir, undef, "apache");
+ ok(@listing);
+}
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+plan tests => 6;
+
+use Net::HTTP;
+
+
+my $s = Net::HTTP->new(Host => "www.apache.org",
+ KeepAlive => 1,
+ Timeout => 15,
+ PeerHTTPVersion => "1.1",
+ MaxLineLength => 512) || die "$@";
+
+for (1..2) {
+ $s->write_request(TRACE => "/libwww-perl",
+ 'User-Agent' => 'Mozilla/5.0',
+ 'Accept-Language' => 'no,en',
+ Accept => '*/*');
+
+ my($code, $mess, %h) = $s->read_response_headers;
+ print "# $code $mess\n";
+ for (sort keys %h) {
+ print "# $_: $h{$_}\n";
+ }
+ print "\n";
+
+ ok($code, "200");
+ ok($h{'Content-Type'}, "message/http");
+
+ my $buf;
+ while (1) {
+ my $tmp;
+ my $n = $s->read_entity_body($tmp, 20);
+ last unless $n;
+ $buf .= $tmp;
+ }
+ $buf =~ s/\r//g;
+
+ ok($buf, <<EOT);
+TRACE /libwww-perl HTTP/1.1
+Host: www.apache.org
+User-Agent: Mozilla/5.0
+Accept-Language: no,en
+Accept: */*
+
+EOT
+}
+
--- /dev/null
+#!perl -w
+
+use strict;
+use Test;
+
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new();
+my $res = $ua->simple_request(HTTP::Request->new(GET => "https://www.apache.org"));
+
+if ($res->code == 501 && $res->message =~ /Protocol scheme 'https' is not supported/) {
+ print "1..0 # Skipped: " . $res->message . "\n";
+ exit;
+}
+
+plan tests => 2;
+ok($res->is_success);
+ok($res->content =~ /Apache Software Foundation/);
+
+$res->dump(prefix => "# ");
--- /dev/null
+use strict;
+use Test;
+
+plan tests => 5;
+
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/Basic/");
+
+my $res = $ua->request($req);
+
+#print $res->as_string;
+
+ok($res->code, 401);
+
+$req->authorization_basic('guest', 'guest');
+$res = $ua->simple_request($req);
+
+print $req->as_string, "\n";
+
+#print $res->as_string;
+ok($res->code, 200);
+ok($res->content =~ /Your browser made it!/);
+
+{
+ package MyUA;
+ use vars qw(@ISA);
+ @ISA = qw(LWP::UserAgent);
+
+ my @try = (['foo', 'bar'], ['', ''], ['guest', ''], ['guest', 'guest']);
+
+ sub get_basic_credentials {
+ my($self,$realm, $uri, $proxy) = @_;
+ #print "$realm/$uri/$proxy\n";
+ my $p = shift @try;
+ #print join("/", @$p), "\n";
+ return @$p;
+ }
+
+}
+
+$ua = MyUA->new(keep_alive => 1);
+
+$req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/Basic/");
+$res = $ua->request($req);
+
+#print $res->as_string;
+
+ok($res->content =~ /Your browser made it!/);
+ok($res->header("Client-Response-Num"), 5);
+
--- /dev/null
+use strict;
+use Test;
+
+plan tests => 2;
+
+use LWP::UserAgent;
+
+{
+ package MyUA;
+ use vars qw(@ISA);
+ @ISA = qw(LWP::UserAgent);
+
+ my @try = (['foo', 'bar'], ['', ''], ['guest', ''], ['guest', 'guest']);
+
+ sub get_basic_credentials {
+ my($self,$realm, $uri, $proxy) = @_;
+ print "$realm:$uri:$proxy => ";
+ my $p = shift @try;
+ print join("/", @$p), "\n";
+ return @$p;
+ }
+
+}
+
+my $ua = MyUA->new(keep_alive => 1);
+
+my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/Digest/");
+my $res = $ua->request($req);
+
+#print $res->as_string;
+
+ok($res->content =~ /Your browser made it!/);
+ok($res->header("Client-Response-Num"), 5);
--- /dev/null
+print "1..5\n";
+
+use strict;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/ChunkedScript");
+my $res = $ua->request($req);
+
+print "not " unless $res->is_success && $res->content_type eq "text/plain";
+print "ok 1\n";
+
+print "not " unless $res->header("Client-Transfer-Encoding") eq "chunked";
+print "ok 2\n";
+
+for (${$res->content_ref}) {
+ s/\015?\012/\n/g;
+ /Below this line, is 1000 repeated lines of 0-9/ || die;
+ s/^.*?-----+\n//s;
+
+ my @lines = split(/^/);
+ print "not " if @lines != 1000;
+ print "ok 3\n";
+
+ # check that all lines are the same
+ my $first = shift(@lines);
+ my $no_they_are_not;
+ for (@lines) {
+ $no_they_are_not++ if $_ ne $first;
+ }
+ print "not " if $no_they_are_not;
+ print "ok 4\n";
+
+ print "not " unless $first =~ /^\d+$/;
+ print "ok 5\n";
+}
--- /dev/null
+print "1..2\n";
+
+use strict;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+my $res = $ua->get(
+ "http://jigsaw.w3.org/HTTP/h-content-md5.html",
+ "TE" => "deflate",
+);
+
+use Digest::MD5 qw(md5_base64);
+print "not " unless $res->header("Content-MD5") eq md5_base64($res->content) . "==";
+print "ok 1\n";
+
+print $res->as_string;
+
+my $etag = $res->header("etag");
+
+$res = $ua->get(
+ "http://jigsaw.w3.org/HTTP/h-content-md5.html",
+ "TE" => "deflate",
+ "If-None-Match" => $etag,
+);
+print $res->as_string;
+
+print "not " unless $res->code eq "304" && $res->header("Client-Response-Num") == 2;
+print "ok 2\n";
--- /dev/null
+print "1..2\n";
+
+use strict;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/h-content-md5.html");
+$req->header("TE", "deflate");
+
+my $res = $ua->request($req);
+
+use Digest::MD5 qw(md5_base64);
+print "not " unless $res->header("Content-MD5") eq md5_base64($res->content) . "==";
+print "ok 1\n";
+
+print $res->as_string;
+
+my $etag = $res->header("etag");
+$req->header("If-None-Match" => $etag);
+
+$res = $ua->request($req);
+print $res->as_string;
+
+print "not " unless $res->code eq "304" && $res->header("Client-Response-Num") == 2;
+print "ok 2\n";
--- /dev/null
+print "1..1\n";
+
+use strict;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+my $res = $ua->get(
+ "http://jigsaw.w3.org/HTTP/neg",
+ Connection => "close",
+);
+
+print $res->as_string, "\n";
+
+print "not " unless $res->code == 300;
+print "ok 1\n";
--- /dev/null
+print "1..1\n";
+
+use strict;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/neg");
+$req->header(Connection => "close");
+my $res = $ua->request($req);
+
+print $res->as_string, "\n";
+
+print "not " unless $res->code == 300;
+print "ok 1\n";
--- /dev/null
+#!perl -w
+
+print "1..4\n";
+
+use strict;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new(keep_alive => 1);
+
+
+my $content;
+my $testno = 1;
+
+for my $te (undef, "", "deflate", "gzip", "trailers, deflate;q=0.4, identity;q=0.1") {
+ my $req = HTTP::Request->new(GET => "http://jigsaw.w3.org/HTTP/TE/foo.txt");
+ if (defined $te) {
+ $req->header(TE => $te);
+ $req->header(Connection => "TE");
+ }
+ print $req->as_string;
+
+ my $res = $ua->request($req);
+ if (defined $content) {
+ print "not " unless $content eq $res->content;
+ print "ok $testno\n\n";
+ $testno++;
+ }
+ else {
+ $content = $res->content;
+ }
+ $res->content("");
+ print $res->as_string;
+}
--- /dev/null
+#
+# See if autoloading of protocol schemes work
+#
+
+print "1..1\n";
+
+require LWP::UserAgent;
+# note no LWP::Protocol::file;
+
+$url = "file:.";
+
+require URI;
+print "Trying to fetch '" . URI->new($url)->file . "'\n";
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+$ua->timeout(30); # timeout in seconds
+
+my $response = $ua->get($url);
+if ($response->is_success) {
+ print "ok 1\n";
+ print $response->as_string;
+}
+else {
+ print "not ok 1\n";
+ print $response->error_as_HTML;
+}
--- /dev/null
+#
+# See if autoloading of protocol schemes work
+#
+
+use Test;
+plan tests => 1;
+
+require LWP::UserAgent;
+# note no LWP::Protocol::file;
+
+$url = "file:.";
+
+require URI;
+print "Trying to fetch '" . URI->new($url)->file . "'\n";
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+$ua->timeout(30); # timeout in seconds
+
+my $request = HTTP::Request->new(GET => $url);
+
+my $response = $ua->request($request);
+ok($response->is_success);
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Config;
+use HTTP::Daemon;
+use Test::More;
+# use Time::HiRes qw(sleep);
+our $CRLF;
+use Socket qw($CRLF);
+
+our $LOGGING = 0;
+
+our @TESTS = (
+ {
+ expect => 629,
+ comment => "traditional, unchunked POST request",
+ raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1
+User-Agent: UNTRUSTED/1.0
+Content-Type: application/x-www-form-urlencoded
+Content-Length: 629
+Host: localhost
+
+JSR-205=0;font_small=15;png=1;jpg=1;alpha_channel=256;JSR-82=0;JSR-135=1;mot-wt=0;JSR-75-pim=0;pointer_motion_event=0;camera=1;free_memory=455472;heap_size=524284;cldc=CLDC-1.1;canvas_size_y=176;canvas_size_x=176;double_buffered=1;color=65536;JSR-120=1;JSR-184=1;JSR-180=0;JSR-75-file=0;push_socket=0;pointer_event=0;nokia-ui=1;java_platform=xxxxxxxxxxxxxxxxx/xxxxxxx;gif=1;midp=MIDP-1.0 MIDP-2.0;font_large=22;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;font_medium=18;fullscreen_canvas_size_y=220;fullscreen_canvas_size_x=176;java_locale=de;video_encoding=encoding=JPEG&width=176&height=182encoding=JPEG&width=176&height=220;"
+ },
+ {
+ expect => 8,
+ comment => "chunked with illegal Content-Length header; tiny message",
+ raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1
+Host: localhost
+Content-Type: application/x-www-form-urlencoded
+Content-Length: 8
+Transfer-Encoding: chunked
+
+8
+icm.x=u2
+0
+
+",
+ },
+ {
+ expect => 868,
+ comment => "chunked with illegal Content-Length header; medium sized",
+ raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1
+Host:dev05
+Connection:close
+Content-Type:application/x-www-form-urlencoded
+Content-Length:868
+transfer-encoding:chunked
+
+364
+JSR-205=0;font_small=20;png=1;jpg=1;JSR-82=0;JSR-135=1;mot-wt=0;JSR-75-pim=0;http=1;pointer_motion_event=0;browser_launch=1;free_memory=733456;user_agent=xxxxxxxxx/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;heap_size=815080;cldc=CLDC-1.0;canvas_size_y=182;canvas_size_x=176;double_buffered=1;NAVIGATION PRESS=20;JSR-184=0;JSR-120=1;color=32768;JSR-180=0;JSR-75-file=0;RIGHT SOFT KEY=22;NAVIGATION RIGHT=5;KEY *=42;push_socket=0;pointer_event=0;KEY #=35;KEY NUM 9=57;nokia-ui=0;KEY NUM 8=56;KEY NUM 7=55;KEY NUM 6=54;KEY NUM 5=53;gif=1;KEY NUM 4=52;NAVIGATION UP=1;KEY NUM 3=51;KEY NUM 2=50;KEY NUM 1=49;midp=MIDP-2.0 VSCL-1.1.0;font_large=20;KEY NUM 0=48;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;NAVIGATION LEFT=2;LEFT SOFT KEY=21;font_medium=20;fullscreen_canvas_size_y=204;fullscreen_canvas_size_x=176;https=1;NAVIGATION DOWN=6;java_locale=en-DE;
+0
+
+",
+ },
+ {
+ expect => 1104,
+ comment => "chunked correctly, size ~1k; base for the big next test",
+ raw => "POST /cgi-bin/redir-TE.pl HTTP/1.1
+User-Agent: UNTRUSTED/1.0
+Content-Type: application/x-www-form-urlencoded
+Host: localhost:80
+Transfer-Encoding: chunked
+
+450
+JSR-205=0;font_small=15;png=1;jpg=1;jsr184_dithering=0;CLEAR/DELETE=-8;JSR-82=0;alpha_channel=32;JSR-135=1;mot-wt=0;JSR-75-pim=0;http=1;pointer_motion_event=0;browser_launch=1;BACK/RETURN=-11;camera=1;free_memory=456248;user_agent=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;heap_size=524284;cldc=CLDC-1.1;canvas_size_y=176;canvas_size_x=176;double_buffered=1;NAVIGATION PRESS=-5;JSR-184=1;JSR-120=1;color=65536;JSR-180=0;JSR-75-file=0;RIGHT SOFT KEY=-7;NAVIGATION RIGHT=-4;KEY *=42;push_socket=0;pointer_event=0;KEY #=35;KEY NUM 9=57;nokia-ui=1;KEY NUM 8=56;KEY NUM 7=55;KEY NUM 6=54;KEY NUM 5=53;java_platform=xxxxxxxxxxxxxxxxx/xxxxxxx;KEY NUM 4=52;gif=1;KEY NUM 3=51;NAVIGATION UP=-1;KEY NUM 2=50;KEY NUM 1=49;midp=MIDP-1.0 MIDP-2.0;font_large=22;KEY NUM 0=48;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;NAVIGATION LEFT=-3;LEFT SOFT KEY=-6;jsr184_antialiasing=0;font_medium=18;fullscreen_canvas_size_y=220;fullscreen_canvas_size_x=176;https=1;NAVIGATION DOWN=-2;java_locale=de;video_encoding=encoding=JPEG&width=176&height=182encoding=JPEG&width=176&height=220;
+0
+
+"
+ },
+ {
+ expect => 1104*1024,
+ comment => "chunked with many chunks",
+ raw => ("POST /cgi-bin/redir-TE.pl HTTP/1.1
+User-Agent: UNTRUSTED/1.0
+Content-Type: application/x-www-form-urlencoded
+Host: localhost:80
+Transfer-Encoding: chunked
+
+".("450
+JSR-205=0;font_small=15;png=1;jpg=1;jsr184_dithering=0;CLEAR/DELETE=-8;JSR-82=0;alpha_channel=32;JSR-135=1;mot-wt=0;JSR-75-pim=0;http=1;pointer_motion_event=0;browser_launch=1;BACK/RETURN=-11;camera=1;free_memory=456248;user_agent=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;heap_size=524284;cldc=CLDC-1.1;canvas_size_y=176;canvas_size_x=176;double_buffered=1;NAVIGATION PRESS=-5;JSR-184=1;JSR-120=1;color=65536;JSR-180=0;JSR-75-file=0;RIGHT SOFT KEY=-7;NAVIGATION RIGHT=-4;KEY *=42;push_socket=0;pointer_event=0;KEY #=35;KEY NUM 9=57;nokia-ui=1;KEY NUM 8=56;KEY NUM 7=55;KEY NUM 6=54;KEY NUM 5=53;java_platform=xxxxxxxxxxxxxxxxx/xxxxxxx;KEY NUM 4=52;gif=1;KEY NUM 3=51;NAVIGATION UP=-1;KEY NUM 2=50;KEY NUM 1=49;midp=MIDP-1.0 MIDP-2.0;font_large=22;KEY NUM 0=48;sie-col-game=0;JSR-179=0;push_sms=1;JSR-172=0;NAVIGATION LEFT=-3;LEFT SOFT KEY=-6;jsr184_antialiasing=0;font_medium=18;fullscreen_canvas_size_y=220;fullscreen_canvas_size_x=176;https=1;NAVIGATION DOWN=-2;java_locale=de;video_encoding=encoding=JPEG&width=176&height=182encoding=JPEG&width=176&height=220;
+"x1024)."0
+
+")
+ },
+ );
+
+
+my $can_fork = $Config{d_fork} ||
+ (($^O eq 'MSWin32' || $^O eq 'NetWare') and
+ $Config{useithreads} and $Config{ccflags} =~ /-DPERL_IMPLICIT_SYS/);
+
+my $tests = @TESTS;
+my $tport = 8333;
+
+my $tsock = IO::Socket::INET->new(LocalAddr => '0.0.0.0',
+ LocalPort => $tport,
+ Listen => 1,
+ ReuseAddr => 1);
+if (!$can_fork) {
+ plan skip_all => "This system cannot fork";
+}
+elsif (!$tsock) {
+ plan skip_all => "Cannot listen on 0.0.0.0:$tport";
+}
+else {
+ close $tsock;
+ plan tests => $tests;
+}
+
+sub mywarn ($) {
+ return unless $LOGGING;
+ my($mess) = @_;
+ open my $fh, ">>", "http-daemon.out"
+ or die $!;
+ my $ts = localtime;
+ print $fh "$ts: $mess\n";
+ close $fh or die $!;
+}
+
+
+my $pid;
+if ($pid = fork) {
+ sleep 4;
+ for my $t (0..$#TESTS) {
+ my $test = $TESTS[$t];
+ my $raw = $test->{raw};
+ $raw =~ s/\r?\n/$CRLF/mg;
+ if (0) {
+ open my $fh, "| socket localhost $tport" or die;
+ print $fh $test;
+ }
+ use IO::Socket::INET;
+ my $sock = IO::Socket::INET->new(
+ PeerAddr => "127.0.0.1",
+ PeerPort => $tport,
+ ) or die;
+ if (0) {
+ for my $pos (0..length($raw)-1) {
+ print $sock substr($raw,$pos,1);
+ sleep 0.001;
+ }
+ } else {
+ print $sock $raw;
+ }
+ local $/;
+ my $resp = <$sock>;
+ close $sock;
+ my($got) = $resp =~ /\r?\n\r?\n(\d+)/s;
+ is($got,
+ $test->{expect},
+ "[$test->{expect}] $test->{comment}",
+ );
+ }
+ wait;
+} else {
+ die "cannot fork: $!" unless defined $pid;
+ my $d = HTTP::Daemon->new(
+ LocalAddr => '0.0.0.0',
+ LocalPort => $tport,
+ ReuseAddr => 1,
+ ) or die;
+ mywarn "Starting new daemon as '$$'";
+ my $i;
+ LISTEN: while (my $c = $d->accept) {
+ my $r = $c->get_request;
+ mywarn sprintf "headers[%s] content[%s]", $r->headers->as_string, $r->content;
+ my $res = HTTP::Response->new(200,undef,undef,length($r->content).$CRLF);
+ $c->send_response($res);
+ $c->force_last_request; # we're just not mature enough
+ $c->close;
+ undef($c);
+ last if ++$i >= $tests;
+ }
+}
+
+
+
+# Local Variables:
+# mode: cperl
+# cperl-indent-level: 2
+# End:
--- /dev/null
+#
+# Test retrieving a file with a 'file://' URL,
+#
+
+if ($^O eq "MacOS") {
+ print "1..0\n";
+ exit;
+}
+
+
+# First locate some suitable tmp-dir. We need an absolute path.
+$TMPDIR = undef;
+for ("/tmp/", "/var/tmp", "/usr/tmp", "/local/tmp") {
+ if (open(TEST, ">$_/test-$$")) {
+ close(TEST);
+ unlink("$_/test-$$");
+ $TMPDIR = $_;
+ last;
+ }
+}
+$TMPDIR ||= $ENV{TEMP} if $^O eq 'MSWin32';
+unless ($TMPDIR) {
+ # Can't run any tests
+ print "1..0\n";
+ print "ok 1\n";
+ exit;
+}
+$TMPDIR =~ tr|\\|/|;
+
+use Test;
+plan tests => 2;
+
+use LWP::Simple;
+require LWP::Protocol::file;
+
+my $orig = "$TMPDIR/lwp-orig-$$"; # local file
+my $copy = "$TMPDIR/lwp-copy-$$"; # downloaded copy
+
+# First we create the original
+open(OUT, ">$orig") or die "Cannot open $orig: $!";
+binmode(OUT);
+for (1..5) {
+ print OUT "This is line $_ of $orig\n";
+}
+close(OUT);
+
+
+# Then we make a test using getprint(), so we need to capture stdout
+open (OUT, ">$copy") or die "Cannot open $copy: $!";
+select(OUT);
+
+# do the retrieval
+getprint("file://localhost" . ($orig =~ m|^/| ? $orig : "/$orig"));
+
+close(OUT);
+select(STDOUT);
+
+# read and compare the files
+open(IN, $orig) or die "Cannot open '$orig': $!";
+undef($/);
+$origtext = <IN>;
+close(IN);
+open(IN, $copy) or die "Cannot open '$copy': $!";
+undef($/);
+$copytext = <IN>;
+close(IN);
+
+unlink($copy);
+
+ok($copytext, $origtext);
+
+
+# Test getstore() function
+
+getstore("file:$orig", $copy);
+
+# Take a look at the new copy
+open(IN, $copy) or die "Cannot open '$copy': $!";
+undef($/);
+$copytext = <IN>;
+close(IN);
+
+unlink($orig);
+unlink($copy);
+
+ok($copytext, $origtext);
--- /dev/null
+if ($^O eq "MacOS") {
+ print "1..0\n";
+ exit(0);
+}
+
+unless (-f "CAN_TALK_TO_OURSELF") {
+ print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n";
+ exit;
+}
+
+$| = 1; # autoflush
+
+require IO::Socket; # make sure this work before we try to make a HTTP::Daemon
+
+# First we make ourself a daemon in another process
+my $D = shift || '';
+if ($D eq 'daemon') {
+
+ require HTTP::Daemon;
+
+ my $d = HTTP::Daemon->new(Timeout => 10);
+
+ print "Please to meet you at: <URL:", $d->url, ">\n";
+ open(STDOUT, $^O eq 'VMS'? ">nl: " : ">/dev/null");
+
+ while ($c = $d->accept) {
+ $r = $c->get_request;
+ if ($r) {
+ my $p = ($r->uri->path_segments)[1];
+ my $func = lc("httpd_" . $r->method . "_$p");
+ if (defined &$func) {
+ &$func($c, $r);
+ }
+ else {
+ $c->send_error(404);
+ }
+ }
+ $c = undef; # close connection
+ }
+ print STDERR "HTTP Server terminated\n";
+ exit;
+}
+else {
+ use Config;
+ my $perl = $Config{'perlpath'};
+ $perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i;
+ open(DAEMON, "$perl local/http.t daemon |") or die "Can't exec daemon: $!";
+}
+
+use Test;
+plan tests => 54;
+
+my $greeting = <DAEMON>;
+$greeting =~ /(<[^>]+>)/;
+
+require URI;
+my $base = URI->new($1);
+sub url {
+ my $u = URI->new(@_);
+ $u = $u->abs($_[1]) if @_ > 1;
+ $u->as_string;
+}
+
+print "Will access HTTP server at $base\n";
+
+require LWP::UserAgent;
+require HTTP::Request;
+$ua = new LWP::UserAgent;
+$ua->agent("Mozilla/0.01 " . $ua->agent);
+$ua->from('gisle@aas.no');
+
+#----------------------------------------------------------------
+print "Bad request...\n";
+$req = new HTTP::Request GET => url("/not_found", $base);
+$req->header(X_Foo => "Bar");
+$res = $ua->request($req);
+
+ok($res->is_error);
+ok($res->code, 404);
+ok($res->message, qr/not\s+found/i);
+# we also expect a few headers
+ok($res->server);
+ok($res->date);
+
+#----------------------------------------------------------------
+print "Simple echo...\n";
+sub httpd_get_echo
+{
+ my($c, $req) = @_;
+ $c->send_basic_header(200);
+ print $c "Content-Type: message/http\015\012";
+ $c->send_crlf;
+ print $c $req->as_string;
+}
+
+$req = new HTTP::Request GET => url("/echo/path_info?query", $base);
+$req->push_header(Accept => 'text/html');
+$req->push_header(Accept => 'text/plain; q=0.9');
+$req->push_header(Accept => 'image/*');
+$req->push_header(':foo_bar' => 1);
+$req->if_modified_since(time - 300);
+$req->header(Long_text => 'This is a very long header line
+which is broken between
+more than one line.');
+$req->header(X_Foo => "Bar");
+
+$res = $ua->request($req);
+#print $res->as_string;
+
+ok($res->is_success);
+ok($res->code, 200);
+ok($res->message, "OK");
+
+$_ = $res->content;
+@accept = /^Accept:\s*(.*)/mg;
+
+ok($_, qr/^From:\s*gisle\@aas\.no\n/m);
+ok($_, qr/^Host:/m);
+ok(@accept, 3);
+ok($_, qr/^Accept:\s*text\/html/m);
+ok($_, qr/^Accept:\s*text\/plain/m);
+ok($_, qr/^Accept:\s*image\/\*/m);
+ok($_, qr/^If-Modified-Since:\s*\w{3},\s+\d+/m);
+ok($_, qr/^Long-Text:\s*This.*broken between/m);
+ok($_, qr/^Foo-Bar:\s*1\n/m);
+ok($_, qr/^X-Foo:\s*Bar\n/m);
+ok($_, qr/^User-Agent:\s*Mozilla\/0.01/m);
+
+# Try it with the higher level 'get' interface
+$res = $ua->get(url("/echo/path_info?query", $base),
+ Accept => 'text/html',
+ Accept => 'text/plain; q=0.9',
+ Accept => 'image/*',
+ X_Foo => "Bar",
+);
+#$res->dump;
+ok($res->code, 200);
+ok($res->content, qr/^From: gisle\@aas.no$/m);
+
+#----------------------------------------------------------------
+print "Send file...\n";
+
+my $file = "test-$$.html";
+open(FILE, ">$file") or die "Can't create $file: $!";
+binmode FILE or die "Can't binmode $file: $!";
+print FILE <<EOT;
+<html><title>En prøve</title>
+<h1>Dette er en testfil</h1>
+Jeg vet ikke hvor stor fila behøver å være heller, men dette
+er sikkert nok i massevis.
+EOT
+close(FILE);
+
+sub httpd_get_file
+{
+ my($c, $r) = @_;
+ my %form = $r->uri->query_form;
+ my $file = $form{'name'};
+ $c->send_file_response($file);
+ unlink($file) if $file =~ /^test-/;
+}
+
+$req = new HTTP::Request GET => url("/file?name=$file", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+
+ok($res->is_success);
+ok($res->content_type, 'text/html');
+ok($res->content_length, 147);
+ok($res->title, 'En prøve');
+ok($res->content, qr/å være/);
+
+# A second try on the same file, should fail because we unlink it
+$res = $ua->request($req);
+#print $res->as_string;
+ok($res->is_error);
+ok($res->code, 404); # not found
+
+# Then try to list current directory
+$req = new HTTP::Request GET => url("/file?name=.", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+ok($res->code, 501); # NYI
+
+
+#----------------------------------------------------------------
+print "Check redirect...\n";
+sub httpd_get_redirect
+{
+ my($c) = @_;
+ $c->send_redirect("/echo/redirect");
+}
+
+$req = new HTTP::Request GET => url("/redirect/foo", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+
+ok($res->is_success);
+ok($res->content, qr|/echo/redirect|);
+ok($res->previous->is_redirect);
+ok($res->previous->code, 301);
+
+# Let's test a redirect loop too
+sub httpd_get_redirect2 { shift->send_redirect("/redirect3/") }
+sub httpd_get_redirect3 { shift->send_redirect("/redirect2/") }
+
+$req->uri(url("/redirect2", $base));
+$ua->max_redirect(5);
+$res = $ua->request($req);
+#print $res->as_string;
+ok($res->is_redirect);
+ok($res->header("Client-Warning"), qr/loop detected/i);
+ok($res->redirects, 5);
+
+$ua->max_redirect(0);
+$res = $ua->request($req);
+ok($res->previous, undef);
+ok($res->redirects, 0);
+$ua->max_redirect(5);
+
+#----------------------------------------------------------------
+print "Check basic authorization...\n";
+sub httpd_get_basic
+{
+ my($c, $r) = @_;
+ #print STDERR $r->as_string;
+ my($u,$p) = $r->authorization_basic;
+ if (defined($u) && $u eq 'ok 12' && $p eq 'xyzzy') {
+ $c->send_basic_header(200);
+ print $c "Content-Type: text/plain";
+ $c->send_crlf;
+ $c->send_crlf;
+ $c->print("$u\n");
+ }
+ else {
+ $c->send_basic_header(401);
+ $c->print("WWW-Authenticate: Basic realm=\"libwww-perl\"\015\012");
+ $c->send_crlf;
+ }
+}
+
+{
+ package MyUA; @ISA=qw(LWP::UserAgent);
+ sub get_basic_credentials {
+ my($self, $realm, $uri, $proxy) = @_;
+ if ($realm eq "libwww-perl" && $uri->rel($base) eq "basic") {
+ return ("ok 12", "xyzzy");
+ }
+ else {
+ return undef;
+ }
+ }
+}
+$req = new HTTP::Request GET => url("/basic", $base);
+$res = MyUA->new->request($req);
+#print $res->as_string;
+
+ok($res->is_success);
+#print $res->content;
+
+# Let's try with a $ua that does not pass out credentials
+$res = $ua->request($req);
+ok($res->code, 401);
+
+# Let's try to set credentials for this realm
+$ua->credentials($req->uri->host_port, "libwww-perl", "ok 12", "xyzzy");
+$res = $ua->request($req);
+ok($res->is_success);
+
+# Then illegal credentials
+$ua->credentials($req->uri->host_port, "libwww-perl", "user", "passwd");
+$res = $ua->request($req);
+ok($res->code, 401);
+
+
+#----------------------------------------------------------------
+print "Check proxy...\n";
+sub httpd_get_proxy
+{
+ my($c,$r) = @_;
+ if ($r->method eq "GET" and
+ $r->uri->scheme eq "ftp") {
+ $c->send_basic_header(200);
+ $c->send_crlf;
+ }
+ else {
+ $c->send_error;
+ }
+}
+
+$ua->proxy(ftp => $base);
+$req = new HTTP::Request GET => "ftp://ftp.perl.com/proxy";
+$res = $ua->request($req);
+#print $res->as_string;
+ok($res->is_success);
+
+#----------------------------------------------------------------
+print "Check POSTing...\n";
+sub httpd_post_echo
+{
+ my($c,$r) = @_;
+ $c->send_basic_header;
+ $c->print("Content-Type: text/plain");
+ $c->send_crlf;
+ $c->send_crlf;
+
+ # Do it the hard way to test the send_file
+ open(TMP, ">tmp$$") || die;
+ binmode(TMP);
+ print TMP $r->as_string;
+ close(TMP) || die;
+
+ $c->send_file("tmp$$");
+
+ unlink("tmp$$");
+}
+
+$req = new HTTP::Request POST => url("/echo/foo", $base);
+$req->content_type("application/x-www-form-urlencoded");
+$req->content("foo=bar&bar=test");
+$res = $ua->request($req);
+#print $res->as_string;
+
+$_ = $res->content;
+ok($res->is_success);
+ok($_, qr/^Content-Length:\s*16$/mi);
+ok($_, qr/^Content-Type:\s*application\/x-www-form-urlencoded$/mi);
+ok($_, qr/^foo=bar&bar=test$/m);
+
+$req = HTTP::Request->new(POST => url("/echo/foo", $base));
+$req->content_type("multipart/form-data");
+$req->add_part(HTTP::Message->new(["Content-Type" => "text/plain"], "Hi\n"));
+$req->add_part(HTTP::Message->new(["Content-Type" => "text/plain"], "there\n"));
+$res = $ua->request($req);
+#print $res->as_string;
+ok($res->is_success);
+ok($res->content =~ /^Content-Type: multipart\/form-data; boundary=/m);
+
+#----------------------------------------------------------------
+print "Check partial content response...\n";
+sub httpd_get_partial
+{
+ my($c) = @_;
+ $c->send_basic_header(206);
+ print $c "Content-Type: image/jpeg\015\012";
+ $c->send_crlf;
+ print $c "some fake JPEG content";
+
+}
+
+{
+ $req = HTTP::Request->new( GET => url("/partial", $base) );
+ $res = $ua->request($req);
+ ok($res->is_success); # "a 206 response is considered successful"
+}
+{
+ $ua->max_size(3);
+ $req = HTTP::Request->new( GET => url("/partial", $base) );
+ $res = $ua->request($req);
+ ok($res->is_success); # "a 206 response is considered successful"
+ # Put max_size back how we found it.
+ $ua->max_size(undef);
+ ok($res->as_string, qr/Client-Aborted: max_size/); # Client-Aborted is returned when max_size is given
+}
+
+
+#----------------------------------------------------------------
+print "Terminating server...\n";
+sub httpd_get_quit
+{
+ my($c) = @_;
+ $c->send_error(503, "Bye, bye");
+ exit; # terminate HTTP server
+}
+
+$req = new HTTP::Request GET => url("/quit", $base);
+$res = $ua->request($req);
+
+ok($res->code, 503);
+ok($res->content, qr/Bye, bye/);
--- /dev/null
+#!perl
+
+use strict;
+use Test;
+plan tests => 6;
+
+# This test tries to make a custom protocol implementation by
+# subclassing of LWP::Protocol.
+
+
+use LWP::UserAgent ();
+use LWP::Protocol ();
+
+LWP::Protocol::implementor(http => 'myhttp');
+
+my $ua = LWP::UserAgent->new;
+$ua->proxy('ftp' => "http://www.sn.no/");
+
+my $req = HTTP::Request->new(GET => 'ftp://foo/');
+$req->header(Cookie => "perl=cool");
+
+my $res = $ua->request($req);
+
+#print $res->as_string;
+ok($res->code, 200);
+ok($res->content, "Howdy\n");
+exit;
+
+
+#----------------------------------
+package myhttp;
+
+use Test qw(ok);
+
+BEGIN {
+ use vars qw(@ISA);
+ @ISA=qw(LWP::Protocol);
+}
+
+sub new
+{
+ my $class = shift;
+ print "CTOR: $class->new(@_)\n";
+ my($prot) = @_;
+ ok($prot, "http");
+ my $self = $class->SUPER::new(@_);
+ for (keys %$self) {
+ my $v = $self->{$_};
+ $v = "<undef>" unless defined($v);
+ print "$_: $v\n";
+ }
+ $self;
+}
+
+sub request
+{
+ my $self = shift;
+ my($request, $proxy, $arg, $size, $timeout) = @_;
+ #print $request->as_string;
+
+ ok($proxy, "http://www.sn.no/");
+ ok($request->uri, "ftp://foo/");
+ ok($request->header("cookie"), "perl=cool");
+
+ my $res = HTTP::Response->new(200 => "OK");
+ $res->content_type("text/plain");
+ $res->date(time);
+ $self->collect_once($arg, $res, "Howdy\n");
+ $res;
+}
--- /dev/null
+#!/bin/sh
+
+echo "Location: http://$SERVER_NAME:$SERVER_PORT/"
+echo
--- /dev/null
+#!/usr/local/bin/perl
+
+# This script outputs some data slowly. It can be used to check that
+# pipelined processing of response content really works. We use syswrite
+# so that the script will notice when the connection is broken.
+
+out("HTTP/1.0 200 OK\r
+Content-Type: text/plain\r
+\r
+");
+
+for (1..5) {
+ out("The number is now $_\n");
+ sleep(1);
+}
+exit;
+
+
+sub out
+{
+ my $data = shift;
+ my $l = length $data;
+ if (syswrite(STDOUT, $data, $l) != $l) {
+ exit 1;
+ }
+}
--- /dev/null
+#!/usr/local/bin/perl
+
+# You might post large amount of data to this script. It will read
+# it slowly.
+
+{ local($!) = 1; print "Content-Type: text/plain\n\n"; }
+
+$len = $ENV{CONTENT_LENGTH};
+
+unless ($len) {
+ system "env";
+ exit;
+}
+
+$size = 20; # chunk size
+
+$content = '';
+$bytes = 0;
+
+sleep(1);
+while ($len > 0) {
+ $n = sysread(STDIN, $b, $size);
+ last if $n <= 0;
+ $len -= $n;
+ $bytes += $n;
+ $content .= $b;
+ sleep(1);
+}
+print "$bytes bytes read\n";
+
+
--- /dev/null
+#!/usr/local/bin/perl
+
+$| = 1;
+print "Content-type: text/plain
+
+";
+
+if (@ARGV) {
+ print "ARGS: ";
+ print join(", ", map { $_ = qq{"$_"} } @ARGV);
+ print "\n\n";
+} else {
+ print "No command line arguments passed to script\n\n";
+}
+
+while (($key,$val) = each %ENV) {
+ print "$key=$val\n";
+}
+
+if ($ENV{CONTENT_LENGTH}) {
+ $len = $ENV{CONTENT_LENGTH};
+ while ($len) {
+ $n = sysread(STDIN, $content, $len, length $content);
+ last unless defined $n;
+ $len -= $n;
+ }
+ print "\nContent\n-------\n$content";
+}
--- /dev/null
+#!/bin/sh
+
+sleep 20;
+
+echo "Content-type: text/plain"
+echo
+echo "Test"
--- /dev/null
+package net;
+
+# Configure these for your local system
+$httpserver = "localhost:80";
+$cgidir = "/cgi-bin/lwp";
+
+# Used for proxy test
+$ftp_proxy = "http://localhost/";
+
+1;
--- /dev/null
+#!/usr/local/bin/perl -w
+#
+# Check GET via HTTP.
+#
+
+print "1..2\n";
+
+require "net/config.pl";
+require HTTP::Request;
+require LWP::UserAgent;
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+
+$netloc = $net::httpserver;
+$script = $net::cgidir . "/test";
+
+$url = "http://$netloc$script?query";
+
+my $request = new HTTP::Request('GET', $url);
+
+print "GET $url\n\n";
+
+my $response = $ua->request($request, undef, undef);
+
+my $str = $response->as_string;
+
+print "$str\n";
+
+if ($response->is_success and $str =~ /^REQUEST_METHOD=GET$/m) {
+ print "ok 1\n";
+}
+else {
+ print "not ok 1\n";
+}
+
+if ($str =~ /^QUERY_STRING=query$/m) {
+ print "ok 2\n";
+}
+else {
+ print "not ok 2\n";
+}
+
+# avoid -w warning
+$dummy = $net::httpserver;
+$dummy = $net::cgidir;
--- /dev/null
+#!/usr/local/bin/perl -w
+#
+# Check POST via HTTP.
+#
+
+print "1..2\n";
+
+require "net/config.pl";
+require HTTP::Request;
+require LWP::UserAgent;
+
+$netloc = $net::httpserver;
+$script = $net::cgidir . "/test";
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+
+$url = "http://$netloc$script";
+
+my $form = 'searchtype=Substring';
+
+my $request = new HTTP::Request('POST', $url, undef, $form);
+$request->header('Content-Type', 'application/x-www-form-urlencoded');
+
+my $response = $ua->request($request, undef, undef);
+
+my $str = $response->as_string;
+
+print "$str\n";
+
+if ($response->is_success and $str =~ /^REQUEST_METHOD=POST$/m) {
+ print "ok 1\n";
+}
+else {
+ print "not ok 1\n";
+}
+
+if ($str =~ /^CONTENT_LENGTH=(\d+)$/m && $1 == length($form)) {
+ print "ok 2\n";
+}
+else {
+ print "not ok 2\n";
+}
+
+# avoid -w warning
+$dummy = $net::httpserver;
+$dummy = $net::cgidir;
--- /dev/null
+#
+# Check timeouts via HTTP.
+#
+
+print "1..1\n";
+
+require "net/config.pl";
+require HTTP::Request;
+require LWP::UserAgent;
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+
+$ua->timeout(4);
+
+$netloc = $net::httpserver;
+$script = $net::cgidir . "/timeout";
+
+$url = "http://$netloc$script";
+
+my $request = new HTTP::Request('GET', $url);
+
+print $request->as_string;
+
+my $response = $ua->request($request, undef);
+
+my $str = $response->as_string;
+
+print "$str\n";
+
+if ($response->is_error and
+ $str =~ /timeout/) {
+ print "ok 1\n";
+}
+else {
+ print "nok ok 1\n";
+}
+
+# avoid -w warning
+$dummy = $net::httpserver;
+$dummy = $net::cgidir;
--- /dev/null
+#
+# Test mirroring a file
+#
+
+require "net/config.pl";
+require LWP::UserAgent;
+require HTTP::Status;
+
+print "1..2\n";
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+
+my $url = "http://$net::httpserver/";
+my $copy = "lwp-test-$$"; # downloaded copy
+
+my $response = $ua->mirror($url, $copy);
+
+if ($response->code == &HTTP::Status::RC_OK) {
+ print "ok 1\n";
+}
+else {
+ print "not ok 1\n";
+}
+
+# OK, so now do it again, should get Not-Modified
+$response = $ua->mirror($url, $copy);
+if ($response->code == &HTTP::Status::RC_NOT_MODIFIED) {
+ print "ok 2\n";
+}
+else {
+ print "not ok 2\n";
+}
+unlink($copy);
+
+$net::httpserver = $net::httpserver; # avoid -w warning
--- /dev/null
+#!/usr/local/bin/perl -w
+#
+
+print "1..1\n";
+
+require "net/config.pl";
+require LWP::UserAgent;
+
+$url = "http://$net::httpserver$net::cgidir/moved";
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+$ua->timeout(30); # timeout in seconds
+
+my $request = new HTTP::Request('GET', $url);
+
+print $request->as_string;
+
+my $response = $ua->request($request, undef, undef);
+
+print $response->as_string, "\n";
+
+if ($response->is_success) {
+ print "ok 1\n";
+}
+else {
+ print "not ok 1\n";
+}
+
+
+# avoid -w warning
+$dummy = $net::httpserver;
+$dummy = $net::cgidir;
--- /dev/null
+#!/usr/local/bin/perl -w
+#
+# Test retrieving a file with a 'ftp://' URL,
+# via a HTTP proxy.
+#
+
+print "1..1\n";
+
+require "net/config.pl";
+unless (defined $net::ftp_proxy) {
+ print "not ok 1\n";
+ exit 0;
+}
+
+require HTTP::Request;
+require LWP::UserAgent;
+
+my $ua = new LWP::UserAgent; # create a useragent to test
+
+$ua->proxy('ftp', $net::ftp_proxy);
+
+my $url = 'ftp://ftp.uninett.no/';
+
+my $request = new HTTP::Request('GET', $url);
+
+my $response = $ua->request($request, undef, undef);
+
+my $str = $response->as_string;
+
+if ($response->is_success) {
+ print "ok 1\n";
+}
+else {
+ print "not ok 1\n";
+}
--- /dev/null
+
+print "1..13\n";
+
+
+use WWW::RobotRules::AnyDBM_File;
+
+$file = "test-$$";
+
+$r = new WWW::RobotRules::AnyDBM_File "myrobot/2.0", $file;
+
+$r->parse("http://www.aas.no/robots.txt", "");
+
+$r->visit("www.aas.no:80");
+
+print "not " if $r->no_visits("www.aas.no:80") != 1;
+print "ok 1\n";
+
+
+$r->push_rules("www.sn.no:80", "/aas", "/per");
+$r->push_rules("www.sn.no:80", "/god", "/old");
+
+@r = $r->rules("www.sn.no:80");
+print "Rules: @r\n";
+
+print "not " if "@r" ne "/aas /per /god /old";
+print "ok 2\n";
+
+$r->clear_rules("per");
+$r->clear_rules("www.sn.no:80");
+
+@r = $r->rules("www.sn.no:80");
+print "Rules: @r\n";
+
+print "not " if "@r" ne "";
+print "ok 3\n";
+
+$r->visit("www.aas.no:80", time+10);
+$r->visit("www.sn.no:80");
+
+print "No visits: ", $r->no_visits("www.aas.no:80"), "\n";
+print "Last visit: ", $r->last_visit("www.aas.no:80"), "\n";
+print "Fresh until: ", $r->fresh_until("www.aas.no:80"), "\n";
+
+print "not " if $r->no_visits("www.aas.no:80") != 2;
+print "ok 4\n";
+
+print "not " if abs($r->last_visit("www.sn.no:80") - time) > 2;
+print "ok 5\n";
+
+$r = undef;
+
+# Try to reopen the database without a name specified
+$r = new WWW::RobotRules::AnyDBM_File undef, $file;
+$r->visit("www.aas.no:80");
+
+print "not " if $r->no_visits("www.aas.no:80") != 3;
+print "ok 6\n";
+
+print "Agent-Name: ", $r->agent, "\n";
+print "not " if $r->agent ne "myrobot";
+print "ok 7\n";
+
+$r = undef;
+
+print "*** Dump of database ***\n";
+tie(%cat, AnyDBM_File, $file, 0, 0644) or die "Can't tie: $!";
+while (($key,$val) = each(%cat)) {
+ print "$key\t$val\n";
+}
+print "******\n";
+
+untie %cat;
+
+# Try to open database with a different agent name
+$r = new WWW::RobotRules::AnyDBM_File "MOMSpider/2.0", $file;
+
+print "not " if $r->no_visits("www.sn.no:80");
+print "ok 8\n";
+
+# Try parsing
+$r->parse("http://www.sn.no:8080/robots.txt", <<EOT, (time + 3));
+
+User-Agent: *
+Disallow: /
+
+User-Agent: Momspider
+Disallow: /foo
+Disallow: /bar
+
+EOT
+
+@r = $r->rules("www.sn.no:8080");
+print "not " if "@r" ne "/foo /bar";
+print "ok 9\n";
+
+print "not " if $r->allowed("http://www.sn.no") >= 0;
+print "ok 10\n";
+
+print "not " if $r->allowed("http://www.sn.no:8080/foo/gisle");
+print "ok 11\n";
+
+sleep(5); # wait until file has expired
+print "not " if $r->allowed("http://www.sn.no:8080/foo/gisle") >= 0;
+print "ok 12\n";
+
+
+$r = undef;
+
+print "*** Dump of database ***\n";
+tie(%cat, AnyDBM_File, $file, 0, 0644) or die "Can't tie: $!";
+while (($key,$val) = each(%cat)) {
+ print "$key\t$val\n";
+}
+print "******\n";
+
+untie %cat; # Otherwise the next line fails on DOSish
+
+while (unlink("$file", "$file.pag", "$file.dir", "$file.db")) {}
+
+# Try open a an emty database without specifying a name
+eval {
+ $r = new WWW::RobotRules::AnyDBM_File undef, $file;
+};
+print $@;
+print "not " unless $@; # should fail
+print "ok 13\n";
+
+unlink "$file", "$file.pag", "$file.dir", "$file.db";
--- /dev/null
+#!/local/bin/perl
+
+=head1 NAME
+
+robot-rules.t
+
+=head1 DESCRIPTION
+
+Test a number of different A</robots.txt> files against a number
+of different User-agents.
+
+=cut
+
+require WWW::RobotRules;
+use Carp;
+use strict;
+
+print "1..50\n"; # for Test::Harness
+
+# We test a number of different /robots.txt files,
+#
+
+my $content1 = <<EOM;
+# http://foo/robots.txt
+User-agent: *
+Disallow: /private
+Disallow: http://foo/also_private
+
+User-agent: MOMspider
+Disallow:
+EOM
+
+my $content2 = <<EOM;
+# http://foo/robots.txt
+User-agent: MOMspider
+ # comment which should be ignored
+Disallow: /private
+EOM
+
+my $content3 = <<EOM;
+# http://foo/robots.txt
+EOM
+
+my $content4 = <<EOM;
+# http://foo/robots.txt
+User-agent: *
+Disallow: /private
+Disallow: mailto:foo
+
+User-agent: MOMspider
+Disallow: /this
+
+User-agent: Another
+Disallow: /that
+
+
+User-agent: SvartEnke1
+Disallow: http://fOO
+Disallow: http://bar
+
+User-Agent: SvartEnke2
+Disallow: ftp://foo
+Disallow: http://foo:8080/
+Disallow: http://bar/
+
+Sitemap: http://www.adobe.com/sitemap.xml
+EOM
+
+my $content5 = <<EOM;
+# I've locked myself away
+User-agent: *
+Disallow: /
+# The castle is your home now, so you can go anywhere you like.
+User-agent: Belle
+Disallow: /west-wing/ # except the west wing!
+# It's good to be the Prince...
+User-agent: Beast
+Disallow:
+EOM
+
+# same thing backwards
+my $content6 = <<EOM;
+# It's good to be the Prince...
+User-agent: Beast
+Disallow:
+# The castle is your home now, so you can go anywhere you like.
+User-agent: Belle
+Disallow: /west-wing/ # except the west wing!
+# I've locked myself away
+User-agent: *
+Disallow: /
+EOM
+
+# and a number of different robots:
+
+my @tests1 = (
+ [$content1, 'MOMspider' =>
+ 1 => 'http://foo/private' => 1,
+ 2 => 'http://foo/also_private' => 1,
+ ],
+
+ [$content1, 'Wubble' =>
+ 3 => 'http://foo/private' => 0,
+ 4 => 'http://foo/also_private' => 0,
+ 5 => 'http://foo/other' => 1,
+ ],
+
+ [$content2, 'MOMspider' =>
+ 6 => 'http://foo/private' => 0,
+ 7 => 'http://foo/other' => 1,
+ ],
+
+ [$content2, 'Wubble' =>
+ 8 => 'http://foo/private' => 1,
+ 9 => 'http://foo/also_private' => 1,
+ 10 => 'http://foo/other' => 1,
+ ],
+
+ [$content3, 'MOMspider' =>
+ 11 => 'http://foo/private' => 1,
+ 12 => 'http://foo/other' => 1,
+ ],
+
+ [$content3, 'Wubble' =>
+ 13 => 'http://foo/private' => 1,
+ 14 => 'http://foo/other' => 1,
+ ],
+
+ [$content4, 'MOMspider' =>
+ 15 => 'http://foo/private' => 1,
+ 16 => 'http://foo/this' => 0,
+ 17 => 'http://foo/that' => 1,
+ ],
+
+ [$content4, 'Another' =>
+ 18 => 'http://foo/private' => 1,
+ 19 => 'http://foo/this' => 1,
+ 20 => 'http://foo/that' => 0,
+ ],
+
+ [$content4, 'Wubble' =>
+ 21 => 'http://foo/private' => 0,
+ 22 => 'http://foo/this' => 1,
+ 23 => 'http://foo/that' => 1,
+ ],
+
+ [$content4, 'Another/1.0' =>
+ 24 => 'http://foo/private' => 1,
+ 25 => 'http://foo/this' => 1,
+ 26 => 'http://foo/that' => 0,
+ ],
+
+ [$content4, "SvartEnke1" =>
+ 27 => "http://foo/" => 0,
+ 28 => "http://foo/this" => 0,
+ 29 => "http://bar/" => 1,
+ ],
+
+ [$content4, "SvartEnke2" =>
+ 30 => "http://foo/" => 1,
+ 31 => "http://foo/this" => 1,
+ 32 => "http://bar/" => 1,
+ ],
+
+ [$content4, "MomSpiderJr" => # should match "MomSpider"
+ 33 => 'http://foo/private' => 1,
+ 34 => 'http://foo/also_private' => 1,
+ 35 => 'http://foo/this/' => 0,
+ ],
+
+ [$content4, "SvartEnk" => # should match "*"
+ 36 => "http://foo/" => 1,
+ 37 => "http://foo/private/" => 0,
+ 38 => "http://bar/" => 1,
+ ],
+
+ [$content5, 'Villager/1.0' =>
+ 39 => 'http://foo/west-wing/' => 0,
+ 40 => 'http://foo/' => 0,
+ ],
+
+ [$content5, 'Belle/2.0' =>
+ 41 => 'http://foo/west-wing/' => 0,
+ 42 => 'http://foo/' => 1,
+ ],
+
+ [$content5, 'Beast/3.0' =>
+ 43 => 'http://foo/west-wing/' => 1,
+ 44 => 'http://foo/' => 1,
+ ],
+
+ [$content6, 'Villager/1.0' =>
+ 45 => 'http://foo/west-wing/' => 0,
+ 46 => 'http://foo/' => 0,
+ ],
+
+ [$content6, 'Belle/2.0' =>
+ 47 => 'http://foo/west-wing/' => 0,
+ 48 => 'http://foo/' => 1,
+ ],
+
+ [$content6, 'Beast/3.0' =>
+ 49 => 'http://foo/west-wing/' => 1,
+ 50 => 'http://foo/' => 1,
+ ],
+
+ # when adding tests, remember to increase
+ # the maximum at the top
+
+ );
+
+my $t;
+
+for $t (@tests1) {
+ my ($content, $ua) = splice(@$t, 0, 2);
+
+ my $robotsrules = new WWW::RobotRules($ua);
+ $robotsrules->parse('http://foo/robots.txt', $content);
+
+ my ($num, $path, $expected);
+ while(($num, $path, $expected) = splice(@$t, 0, 3)) {
+ my $allowed = $robotsrules->allowed($path);
+ $allowed = 1 if $allowed;
+ if($allowed != $expected) {
+ $robotsrules->dump;
+ confess "Test Failed: $ua => $path ($allowed != $expected)";
+ }
+ print "ok $num\n";
+ }
+}
--- /dev/null
+if($^O eq "MacOS") {
+ print "1..0\n";
+ exit(0);
+}
+
+unless (-f "CAN_TALK_TO_OURSELF") {
+ print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n";
+ exit;
+}
+
+$| = 1; # autoflush
+require IO::Socket; # make sure this work before we try to make a HTTP::Daemon
+
+# First we make ourself a daemon in another process
+my $D = shift || '';
+if ($D eq 'daemon') {
+
+ require HTTP::Daemon;
+
+ my $d = new HTTP::Daemon Timeout => 10;
+
+ print "Please to meet you at: <URL:", $d->url, ">\n";
+ open(STDOUT, $^O eq 'MSWin32' ? ">nul" : $^O eq 'VMS' ? ">NL:" : ">/dev/null");
+
+ while ($c = $d->accept) {
+ $r = $c->get_request;
+ if ($r) {
+ my $p = ($r->uri->path_segments)[1];
+ $p =~ s/\W//g;
+ my $func = lc("httpd_" . $r->method . "_$p");
+ #print STDERR "Calling $func...\n";
+ if (defined &$func) {
+ &$func($c, $r);
+ }
+ else {
+ $c->send_error(404);
+ }
+ }
+ $c = undef; # close connection
+ }
+ print STDERR "HTTP Server terminated\n";
+ exit;
+}
+else {
+ use Config;
+ my $perl = $Config{'perlpath'};
+ $perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i;
+ open(DAEMON , "$perl robot/ua.t daemon |") or die "Can't exec daemon: $!";
+}
+
+print "1..8\n";
+
+
+$greating = <DAEMON>;
+$greating =~ /(<[^>]+>)/;
+
+require URI;
+my $base = URI->new($1);
+sub url {
+ my $u = URI->new(@_);
+ $u = $u->abs($_[1]) if @_ > 1;
+ $u->as_string;
+}
+
+print "Will access HTTP server at $base\n";
+
+require LWP::RobotUA;
+require HTTP::Request;
+$ua = new LWP::RobotUA 'lwp-spider/0.1', 'gisle@aas.no';
+$ua->delay(0.05); # rather quick robot
+
+#----------------------------------------------------------------
+sub httpd_get_robotstxt
+{
+ my($c,$r) = @_;
+ $c->send_basic_header;
+ $c->print("Content-Type: text/plain");
+ $c->send_crlf;
+ $c->send_crlf;
+ $c->print("User-Agent: *
+Disallow: /private
+
+");
+}
+
+sub httpd_get_someplace
+{
+ my($c,$r) = @_;
+ $c->send_basic_header;
+ $c->print("Content-Type: text/plain");
+ $c->send_crlf;
+ $c->send_crlf;
+ $c->print("Okidok\n");
+}
+
+$res = $ua->get( url("/someplace", $base) );
+#print $res->as_string;
+print "not " unless $res->is_success;
+print "ok 1\n";
+
+$res = $ua->get( url("/private/place", $base) );
+#print $res->as_string;
+print "not " unless $res->code == 403
+ and $res->message =~ /robots.txt/;
+print "ok 2\n";
+
+
+$res = $ua->get( url("/foo", $base) );
+#print $res->as_string;
+print "not " unless $res->code == 404; # not found
+print "ok 3\n";
+
+# Let the robotua generate "Service unavailable/Retry After response";
+$ua->delay(1);
+$ua->use_sleep(0);
+
+$res = $ua->get( url("/foo", $base) );
+#print $res->as_string;
+print "not " unless $res->code == 503 # Unavailable
+ and $res->header("Retry-After");
+print "ok 4\n";
+
+#----------------------------------------------------------------
+print "Terminating server...\n";
+sub httpd_get_quit
+{
+ my($c) = @_;
+ $c->send_error(503, "Bye, bye");
+ exit; # terminate HTTP server
+}
+
+$ua->delay(0);
+
+$res = $ua->get( url("/quit", $base) );
+
+print "not " unless $res->code == 503 and $res->content =~ /Bye, bye/;
+print "ok 5\n";
+
+#---------------------------------------------------------------
+$ua->delay(1);
+
+# host_wait() should be around 60s now
+print "not " unless abs($ua->host_wait($base->host_port) - 60) < 5;
+print "ok 6\n";
+
+# Number of visits to this place should be
+print "not " unless $ua->no_visits($base->host_port) == 4;
+print "ok 7\n";
+
+# RobotUA used to have problem with mailto URLs.
+$ENV{SENDMAIL} = "dummy";
+$res = $ua->get("mailto:gisle\@aas.no");
+#print $res->as_string;
+
+print "not " unless $res->code == 400 && $res->message eq "Library does not allow method GET for 'mailto:' URLs";
+print "ok 8\n";
--- /dev/null
+if($^O eq "MacOS") {
+ print "1..0\n";
+ exit(0);
+}
+
+unless (-f "CAN_TALK_TO_OURSELF") {
+ print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n";
+ exit;
+}
+
+$| = 1; # autoflush
+require IO::Socket; # make sure this work before we try to make a HTTP::Daemon
+
+# First we make ourself a daemon in another process
+my $D = shift || '';
+if ($D eq 'daemon') {
+
+ require HTTP::Daemon;
+
+ my $d = new HTTP::Daemon Timeout => 10;
+
+ print "Please to meet you at: <URL:", $d->url, ">\n";
+ open(STDOUT, $^O eq 'MSWin32' ? ">nul" : $^O eq 'VMS' ? ">NL:" : ">/dev/null");
+
+ while ($c = $d->accept) {
+ $r = $c->get_request;
+ if ($r) {
+ my $p = ($r->uri->path_segments)[1];
+ $p =~ s/\W//g;
+ my $func = lc("httpd_" . $r->method . "_$p");
+ #print STDERR "Calling $func...\n";
+ if (defined &$func) {
+ &$func($c, $r);
+ }
+ else {
+ $c->send_error(404);
+ }
+ }
+ $c = undef; # close connection
+ }
+ print STDERR "HTTP Server terminated\n";
+ exit;
+}
+else {
+ use Config;
+ my $perl = $Config{'perlpath'};
+ $perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i;
+ open(DAEMON , "$perl robot/ua.t daemon |") or die "Can't exec daemon: $!";
+}
+
+print "1..7\n";
+
+
+$greating = <DAEMON>;
+$greating =~ /(<[^>]+>)/;
+
+require URI;
+my $base = URI->new($1);
+sub url {
+ my $u = URI->new(@_);
+ $u = $u->abs($_[1]) if @_ > 1;
+ $u->as_string;
+}
+
+print "Will access HTTP server at $base\n";
+
+require LWP::RobotUA;
+require HTTP::Request;
+$ua = new LWP::RobotUA 'lwp-spider/0.1', 'gisle@aas.no';
+$ua->delay(0.05); # rather quick robot
+
+#----------------------------------------------------------------
+sub httpd_get_robotstxt
+{
+ my($c,$r) = @_;
+ $c->send_basic_header;
+ $c->print("Content-Type: text/plain");
+ $c->send_crlf;
+ $c->send_crlf;
+ $c->print("User-Agent: *
+Disallow: /private
+
+");
+}
+
+sub httpd_get_someplace
+{
+ my($c,$r) = @_;
+ $c->send_basic_header;
+ $c->print("Content-Type: text/plain");
+ $c->send_crlf;
+ $c->send_crlf;
+ $c->print("Okidok\n");
+}
+
+$req = new HTTP::Request GET => url("/someplace", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+print "not " unless $res->is_success;
+print "ok 1\n";
+
+$req = new HTTP::Request GET => url("/private/place", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+print "not " unless $res->code == 403
+ and $res->message =~ /robots.txt/;
+print "ok 2\n";
+
+$req = new HTTP::Request GET => url("/foo", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+print "not " unless $res->code == 404; # not found
+print "ok 3\n";
+
+# Let the robotua generate "Service unavailable/Retry After response";
+$ua->delay(1);
+$ua->use_sleep(0);
+$req = new HTTP::Request GET => url("/foo", $base);
+$res = $ua->request($req);
+#print $res->as_string;
+print "not " unless $res->code == 503 # Unavailable
+ and $res->header("Retry-After");
+print "ok 4\n";
+
+#----------------------------------------------------------------
+print "Terminating server...\n";
+sub httpd_get_quit
+{
+ my($c) = @_;
+ $c->send_error(503, "Bye, bye");
+ exit; # terminate HTTP server
+}
+
+$ua->delay(0);
+$req = new HTTP::Request GET => url("/quit", $base);
+$res = $ua->request($req);
+
+print "not " unless $res->code == 503 and $res->content =~ /Bye, bye/;
+print "ok 5\n";
+
+#---------------------------------------------------------------
+$ua->delay(1);
+
+# host_wait() should be around 60s now
+print "not " unless abs($ua->host_wait($base->host_port) - 60) < 5;
+print "ok 6\n";
+
+# Number of visits to this place should be
+print "not " unless $ua->no_visits($base->host_port) == 4;
+print "ok 7\n";
+
--- /dev/null
+#!perl -w
+
+# This program check if we are able to talk to ourself. Misconfigured
+# systems that can't talk to their own 'hostname' was the most commonly
+# reported libwww-failure.
+
+use strict;
+require IO::Socket;
+
+if (@ARGV >= 2 && $ARGV[0] eq "--port") {
+ my $port = $ARGV[1];
+ require Sys::Hostname;
+ my $host = Sys::Hostname::hostname();
+ if (my $socket = IO::Socket::INET->new(PeerAddr => "$host:$port", Timeout => 5)) {
+ require IO::Select;
+ if (IO::Select->new($socket)->can_read(1)) {
+ my($n, $buf);
+ if ($n = sysread($socket, $buf, 512)) {
+ exit if $buf eq "Hi there!\n";
+ die "Seems to be talking to the wrong server at $host:$port, got \"$buf\"\n";
+ }
+ elsif (defined $n) {
+ die "Immediate EOF from server at $host:$port\n";
+ }
+ else {
+ die "Can't read from server at $host:$port: $!";
+ }
+ }
+ die "No response from server at $host:$port\n";
+ }
+ die "Can't connect: $@\n";
+}
+
+# server code
+my $socket = IO::Socket::INET->new(Listen => 1, Timeout => 5);
+my $port = $socket->sockport;
+open(CLIENT, qq("$^X" "$0" --port $port |)) || die "Can't run $^X $0: $!\n";
+
+if (my $client = $socket->accept) {
+ print $client "Hi there!\n";
+ close($client) || die "Can't close socket: $!";
+}
+else {
+ warn "Test server timeout\n";
+}
+
+exit if close(CLIENT);
+die "Can't wait for client: $!" if $!;
+die "The can-we-talk-to-ourself test failed.\n";