From d4b3706fe134a5650e0a537022f434d921851fff Mon Sep 17 00:00:00 2001 From: Rafael Garcia-Suarez Date: Fri, 26 Jan 2007 13:56:29 +0000 Subject: [PATCH] Upgrade to File::Fetch 0.10, by Jos Boumans p4raw-id: //depot/perl@30008 --- MANIFEST | 1 - lib/File/Fetch.pm | 299 +++++++++++++++++++++++++++++---------- lib/File/Fetch/Item.pm | 52 ------- lib/File/Fetch/t/01_File-Fetch.t | 65 +++++---- 4 files changed, 265 insertions(+), 152 deletions(-) delete mode 100644 lib/File/Fetch/Item.pm diff --git a/MANIFEST b/MANIFEST index 7cf0743..0ad36c9 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1721,7 +1721,6 @@ lib/File/Copy.pm Emulation of cp command lib/File/Copy.t See if File::Copy works lib/File/DosGlob.pm Win32 DOS-globbing module lib/File/DosGlob.t See if File::DosGlob works -lib/File/Fetch/Item.pm File::Fetch lib/File/Fetch.pm File::Fetch lib/File/Fetch/t/01_File-Fetch.t File::Fetch tests lib/File/Find.pm Routines to do a find diff --git a/lib/File/Fetch.pm b/lib/File/Fetch.pm index 12fce75..59e0873 100644 --- a/lib/File/Fetch.pm +++ b/lib/File/Fetch.pm @@ -5,7 +5,6 @@ use FileHandle; use File::Copy; use File::Spec; use File::Spec::Unix; -use File::Fetch::Item; use File::Basename qw[dirname]; use Cwd qw[cwd]; @@ -21,7 +20,10 @@ use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT $FTP_PASSIVE $TIMEOUT $DEBUG $WARN ]; -$VERSION = 0.08; +use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] }; + + +$VERSION = '0.10'; $PREFER_BIN = 0; # XXX TODO implement $FROM_EMAIL = 'File-Fetch@example.com'; $USER_AGENT = 'File::Fetch/$VERSION'; @@ -86,6 +88,144 @@ C, or C uri by a number of different means. See the C section further down for details. +=head1 ACCESSORS + +A C object has the following accessors + +=over 4 + +=item $ff->uri + +The uri you passed to the constructor + +=item $ff->scheme + +The scheme from the uri (like 'file', 'http', etc) + +=item $ff->host + +The hostname in the uri, will be empty for a 'file' scheme. + +=item $ff->path + +The path from the uri, will be at least a single '/'. + +=item $ff->file + +The name of the remote file. For the local file name, the +result of $ff->output_file will be used. + +=cut + + +########################## +### Object & Accessors ### +########################## + +{ + ### template for new() and autogenerated accessors ### + my $Tmpl = { + scheme => { default => 'http' }, + host => { default => 'localhost' }, + path => { default => '/' }, + file => { required => 1 }, + uri => { required => 1 }, + _error_msg => { no_override => 1 }, + _error_msg_long => { no_override => 1 }, + }; + + for my $method ( keys %$Tmpl ) { + no strict 'refs'; + *$method = sub { + my $self = shift; + $self->{$method} = $_[0] if @_; + return $self->{$method}; + } + } + + sub _create { + my $class = shift; + my %hash = @_; + + my $args = check( $Tmpl, \%hash ) or return; + + bless $args, $class; + + if( lc($args->scheme) ne 'file' and not $args->host ) { + return File::Fetch->_error(loc( + "Hostname required when fetching from '%1'",$args->scheme)); + } + + for (qw[path file]) { + unless( $args->$_ ) { + return File::Fetch->_error(loc("No '%1' specified",$_)); + } + } + + return $args; + } +} + +=item $ff->output_file + +The name of the output file. This is the same as $ff->file, +but any query parameters are stripped off. For example: + + http://example.com/index.html?x=y + +would make the output file be C rather than +C. + +=back + +=cut + +sub output_file { + my $self = shift; + my $file = $self->file; + + $file =~ s/\?.*$//g; + + return $file; +} + +### XXX do this or just point to URI::Escape? +# =head2 $esc_uri = $ff->escaped_uri +# +# =cut +# +# ### most of this is stolen straight from URI::escape +# { ### Build a char->hex map +# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; +# +# sub escaped_uri { +# my $self = shift; +# my $uri = $self->uri; +# +# ### Default unsafe characters. RFC 2732 ^(uric - reserved) +# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ +# $escapes{$1} || $self->_fail_hi($1)/ge; +# +# return $uri; +# } +# +# sub _fail_hi { +# my $self = shift; +# my $char = shift; +# +# $self->_error(loc( +# "Can't escape '%1', try using the '%2' module instead", +# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' +# )); +# } +# +# sub output_file { +# +# } +# +# +# } + =head1 METHODS =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); @@ -112,11 +252,11 @@ sub new { my $href = __PACKAGE__->_parse_uri( $uri ) or return; ### make it into a FFI object ### - my $ffi = File::Fetch::Item->new( %$href ) or return; + my $ff = File::Fetch->_create( %$href ) or return; ### return the object ### - return $ffi; + return $ff; } ### parses an uri to a hash structure: @@ -208,7 +348,17 @@ sub fetch { ### method is known to fail ### next if $METHOD_FAIL->{$method}; - if(my $file = $self->$sub(to=>File::Spec->catfile($to,$self->file))){ + ### there's serious issues with IPC::Run and quoting of command + ### line arguments. using quotes in the wrong place breaks things, + ### and in the case of say, + ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document + ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" + ### it doesn't matter how you quote, it always fails. + local $IPC::Cmd::USE_IPC_RUN = 0; + + if( my $file = $self->$sub( + to => File::Spec->catfile( $to, $self->output_file ) + )){ unless( -e $file && -s _ ) { $self->_error(loc("'%1' said it fetched '%2', ". @@ -233,37 +383,6 @@ sub fetch { return; } -=head1 ACCESSORS - -A C object has the following accessors - -=over 4 - -=item $ff->uri - -The uri you passed to the constructor - -=item $ff->scheme - -The scheme from the uri (like 'file', 'http', etc) - -=item $ff->host - -The hostname in the uri, will be empty for a 'file' scheme. - -=item $ff->path - -The path from the uri, will be at least a single '/'. - -=item $ff->file - -The name of the remote file. Will be used as the name for the local -file as well. - -=back - -=cut - ######################## ### _*_fetch methods ### ######################## @@ -404,11 +523,18 @@ sub _wget_fetch { push @$cmd, '--passive-ftp' if $FTP_PASSIVE; ### set the output document, add the uri ### - push @$cmd, '--output-document', $to, $self->uri; + push @$cmd, '--output-document', + ### DO NOT quote things for IPC::Run, it breaks stuff. + $IPC::Cmd::USE_IPC_RUN + ? ($to, $self->uri) + : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; - unless( run( command => $cmd, buffer => \$captured, verbose => 0 ) ) { + unless(run( command => $cmd, + buffer => \$captured, + verbose => $DEBUG + )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; @@ -436,7 +562,7 @@ sub _ftp_fetch { }; check( $tmpl, \%hash ) or return; - ### see if we have a wget binary ### + ### see if we have a ftp binary ### if( my $ftp = can_run('ftp') ) { my $fh = FileHandle->new; @@ -454,7 +580,7 @@ sub _ftp_fetch { "cd /", "cd " . $self->path, "binary", - "get " . $self->file . " " . $self->file, + "get " . $self->file . " " . $self->output_file, "quit", ); @@ -477,9 +603,16 @@ sub _lynx_fetch { }; check( $tmpl, \%hash ) or return; - ### see if we have a wget binary ### + ### see if we have a lynx binary ### if( my $lynx = can_run('lynx') ) { + unless( IPC::Cmd->can_capture_buffer ) { + $METHOD_FAIL->{'lynx'} = 1; + + return $self->_error(loc( + "Can not capture buffers. Can not use '%1' to fetch files", + 'lynx' )); + } ### write to the output file ourselves, since lynx ass_u_mes to much my $local = FileHandle->new(">$to") @@ -495,7 +628,11 @@ sub _lynx_fetch { push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; - push @$cmd, $self->uri; + ### DO NOT quote things for IPC::Run, it breaks stuff. + push @$cmd, $IPC::Cmd::USE_IPC_RUN + ? $self->uri + : QUOTE. $self->uri .QUOTE; + ### shell out ### my $captured; @@ -540,7 +677,7 @@ sub _ncftp_fetch { ### if $FTP_PASSIVE is set return if $FTP_PASSIVE; - ### see if we have a wget binary ### + ### see if we have a ncftp binary ### if( my $ncftp = can_run('ncftp') ) { my $cmd = [ @@ -550,7 +687,12 @@ sub _ncftp_fetch { $self->host, # hostname dirname($to), # local dir for the file # remote path to the file - File::Spec::Unix->catdir( $self->path, $self->file ), + ### DO NOT quote things for IPC::Run, it breaks stuff. + $IPC::Cmd::USE_IPC_RUN + ? File::Spec::Unix->catdir( $self->path, $self->file ) + : QUOTE. File::Spec::Unix->catdir( + $self->path, $self->file ) .QUOTE + ]; ### shell out ### @@ -597,7 +739,11 @@ sub _curl_fetch { ### curl doesn't follow 302 (temporarily moved) etc automatically ### so we add --location to enable that. - push @$cmd, '--fail', '--location', '--output', $to, $self->uri; + push @$cmd, '--fail', '--location', '--output', + ### DO NOT quote things for IPC::Run, it breaks stuff. + $IPC::Cmd::USE_IPC_RUN + ? ($to, $self->uri) + : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); my $captured; unless(run( command => $cmd, @@ -671,7 +817,10 @@ sub _rsync_fetch { push(@$cmd, '--quiet') unless $DEBUG; - push @$cmd, $self->uri, $to; + ### DO NOT quote things for IPC::Run, it breaks stuff. + push @$cmd, $IPC::Cmd::USE_IPC_RUN + ? ($self->uri, $to) + : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE); my $captured; unless(run( command => $cmd, @@ -705,31 +854,25 @@ Pass it a true value to get the C output instead. =cut -### Error handling, the way Archive::Tar does it ### -{ - my $error = ''; - my $longmess = ''; - - sub _error { - my $self = shift; - $error = shift; - $longmess = Carp::longmess($error); - - ### set Archive::Tar::WARN to 0 to disable printing - ### of errors - if( $WARN ) { - carp $DEBUG ? $longmess : $error; - } - - return; +### error handling the way Archive::Extract does it +sub _error { + my $self = shift; + my $error = shift; + + $self->_error_msg( $error ); + $self->_error_msg_long( Carp::longmess($error) ); + + if( $WARN ) { + carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; } - sub error { - my $self = shift; - return shift() ? $longmess : $error; - } + return; } +sub error { + my $self = shift; + return shift() ? $self->_error_msg_long : $self->_error_msg; +} 1; @@ -888,6 +1031,20 @@ between a 'successfull' fetch and a custom error page. Therefor, we recommend to only use C as a last resort. This is why it is at the back of our list of methods to try as well. +=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? + +C is relatively smart about things. When trying to write +a file to disk, it removes the C (see the +C method for details) from the file name before creating +it. In most cases this suffices. + +If you have any other characters you need to escape, please install +the C module from CPAN, and pre-encode your URI before +passing it to C. You can read about the details of URIs +and URI encoding here: + + http://www.faqs.org/rfcs/rfc2396.html + =head1 TODO =over 4 @@ -898,14 +1055,12 @@ To indicate to rather use commandline tools than modules =head1 AUTHORS -This module by -Jos Boumans Ekane@cpan.orgE. +This module by Jos Boumans Ekane@cpan.orgE. =head1 COPYRIGHT -This module is -copyright (c) 2003 Jos Boumans Ekane@cpan.orgE. -All rights reserved. +This module is copyright (c) 2003-2007 Jos Boumans +Ekane@cpan.orgE. All rights reserved. This library is free software; you may redistribute and/or modify it under the same diff --git a/lib/File/Fetch/Item.pm b/lib/File/Fetch/Item.pm deleted file mode 100644 index 47cc1e8..0000000 --- a/lib/File/Fetch/Item.pm +++ /dev/null @@ -1,52 +0,0 @@ -package File::Fetch::Item; - -use strict; -use base 'File::Fetch'; - -use Params::Check qw[check]; -use Locale::Maketext::Simple Style => 'gettext'; - -$Params::Check::VERBOSE = 1; - -### template for new() and autogenerated accessors ### -my $Tmpl = { - scheme => { default => 'http' }, - host => { default => 'localhost' }, - path => { default => '/' }, - file => { required => 1 }, - uri => { required => 1 }, -}; - -for my $method ( keys %$Tmpl ) { - no strict 'refs'; - *$method = sub { - my $self = shift; - $self->{$method} = $_[0] if @_; - return $self->{$method}; - } -} - -sub new { - my $class = shift; - my %hash = @_; - - my $args = check( $Tmpl, \%hash ) or return; - - bless $args, $class; - - if( lc($args->scheme) ne 'file' and not $args->host ) { - return File::Fetch->_error(loc( - "Hostname required when fetching from '%1'",$args->scheme)); - } - - for (qw[path file]) { - unless( $args->$_ ) { - return File::Fetch->_error(loc("No '%1' specified",$_)); - } - } - - return $args; -} - - -1; diff --git a/lib/File/Fetch/t/01_File-Fetch.t b/lib/File/Fetch/t/01_File-Fetch.t index c7cbd8b..0c47c32 100644 --- a/lib/File/Fetch/t/01_File-Fetch.t +++ b/lib/File/Fetch/t/01_File-Fetch.t @@ -9,6 +9,12 @@ use Cwd qw[cwd]; use File::Basename qw[basename]; use Data::Dumper; +use_ok('File::Fetch'); + +### optionally set debugging ### +$File::Fetch::DEBUG = $File::Fetch::DEBUG = 1 if $ARGV[0]; +$IPC::Cmd::DEBUG = $IPC::Cmd::DEBUG = 1 if $ARGV[0]; + unless( $ENV{PERL_CORE} ) { warn qq[ @@ -23,14 +29,21 @@ to no fault of the module itself. ]; - sleep 3; + sleep 3 unless $File::Fetch::DEBUG; } -use_ok('File::Fetch'); -use_ok('File::Fetch::Item'); - -### optionally set debugging ### -$File::Fetch::DEBUG = $File::Fetch::DEBUG = 1 if $ARGV[0]; +### show us the tools IPC::Cmd will use to run binary programs +if( $File::Fetch::DEBUG ) { + ### stupid 'used only once' warnings ;( + diag( "IPC::Run enabled: " . + $IPC::Cmd::USE_IPC_RUN || $IPC::Cmd::USE_IPC_RUN ); + diag( "IPC::Run available: " . IPC::Cmd->can_use_ipc_run ); + diag( "IPC::Run vesion: $IPC::Run::VERSION" ); + diag( "IPC::Open3 enabled: " . + $IPC::Cmd::USE_IPC_OPEN3 || $IPC::Cmd::USE_IPC_OPEN3 ); + diag( "IPC::Open3 available: " . IPC::Cmd->can_use_ipc_open3 ); + diag( "IPC::Open3 vesion: $IPC::Open3::VERSION" ); +} ### _parse_uri tests my $map = [ @@ -47,10 +60,10 @@ my $map = [ file => 'foo.txt', }, { uri => 'rsync://cpan.pair.com/CPAN/MIRRORING.FROM', - scheme => 'rsync', - host => 'cpan.pair.com', - path => '/CPAN/', - file => 'MIRRORING.FROM', + scheme => 'rsync', + host => 'cpan.pair.com', + path => '/CPAN/', + file => 'MIRRORING.FROM', }, ]; @@ -67,23 +80,17 @@ for my $entry (@$map ) { } } -### File::Fetch::Item tests ### +### File::Fetch->new tests ### for my $entry (@$map) { - my $ffi = File::Fetch::Item->new( %$entry ); - isa_ok( $ffi, 'File::Fetch::Item' ); + my $ff = File::Fetch->new( uri => $entry->{uri} ); + isa_ok( $ff, "File::Fetch" ); for my $acc ( keys %$entry ) { - is( $ffi->$acc(), $entry->{$acc}, + is( $ff->$acc(), $entry->{$acc}, " Accessor '$acc' ok" ); } } -### File::Fetch->new tests ### -for my $entry (@$map) { - my $ff = File::Fetch->new( uri => $entry->{uri} ); - isa_ok( $ff, "File::Fetch::Item" ); -} - ### fetch() tests ### ### file:// tests ### @@ -109,10 +116,12 @@ for my $entry (@$map) { } ### http:// tests ### -{ my $uri = 'http://www.cpan.org/index.html'; - - for (qw[lwp wget curl lynx]) { - _fetch_uri( http => $uri, $_ ); +{ for my $uri ( 'http://www.cpan.org/index.html', + 'http://www.cpan.org/index.html?q=1&y=2' + ) { + for (qw[lwp wget curl lynx]) { + _fetch_uri( http => $uri, $_ ); + } } } @@ -130,7 +139,7 @@ sub _fetch_uri { my $method = shift or return; SKIP: { - skip "'$method' fetching tests disabled under perl core", 3 + skip "'$method' fetching tests disabled under perl core", 4 if $ENV{PERL_CORE}; ### stupid warnings ### @@ -144,12 +153,14 @@ sub _fetch_uri { my $file = $ff->fetch( to => 'tmp' ); SKIP: { - skip "You do not have '$method' installed", 2 + skip "You do not have '$method' installed/available", 3 if $File::Fetch::METHOD_FAIL->{$method} && $File::Fetch::METHOD_FAIL->{$method}; ok( $file, " File ($file) fetched using $method" ); - ok( -s $file, " File ($file) has size" ); + ok( -s $file, " File has size" ); + is( basename($file), $ff->output_file, + " File has expected name" ); unlink $file; } -- 2.7.4