use File::Copy;
use File::Spec;
use File::Spec::Unix;
-use File::Fetch::Item;
use File::Basename qw[dirname];
use Cwd qw[cwd];
$FTP_PASSIVE $TIMEOUT $DEBUG $WARN
];
-$VERSION = 0.08;
+use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] };
+
+
+$VERSION = '0.10';
$PREFER_BIN = 0; # XXX TODO implement
$FROM_EMAIL = 'File-Fetch@example.com';
$USER_AGENT = 'File::Fetch/$VERSION';
See the C<HOW IT WORKS> section further down for details.
+=head1 ACCESSORS
+
+A C<File::Fetch> object has the following accessors
+
+=over 4
+
+=item $ff->uri
+
+The uri you passed to the constructor
+
+=item $ff->scheme
+
+The scheme from the uri (like 'file', 'http', etc)
+
+=item $ff->host
+
+The hostname in the uri, will be empty for a 'file' scheme.
+
+=item $ff->path
+
+The path from the uri, will be at least a single '/'.
+
+=item $ff->file
+
+The name of the remote file. For the local file name, the
+result of $ff->output_file will be used.
+
+=cut
+
+
+##########################
+### Object & Accessors ###
+##########################
+
+{
+ ### template for new() and autogenerated accessors ###
+ my $Tmpl = {
+ scheme => { default => 'http' },
+ host => { default => 'localhost' },
+ path => { default => '/' },
+ file => { required => 1 },
+ uri => { required => 1 },
+ _error_msg => { no_override => 1 },
+ _error_msg_long => { no_override => 1 },
+ };
+
+ for my $method ( keys %$Tmpl ) {
+ no strict 'refs';
+ *$method = sub {
+ my $self = shift;
+ $self->{$method} = $_[0] if @_;
+ return $self->{$method};
+ }
+ }
+
+ sub _create {
+ my $class = shift;
+ my %hash = @_;
+
+ my $args = check( $Tmpl, \%hash ) or return;
+
+ bless $args, $class;
+
+ if( lc($args->scheme) ne 'file' and not $args->host ) {
+ return File::Fetch->_error(loc(
+ "Hostname required when fetching from '%1'",$args->scheme));
+ }
+
+ for (qw[path file]) {
+ unless( $args->$_ ) {
+ return File::Fetch->_error(loc("No '%1' specified",$_));
+ }
+ }
+
+ return $args;
+ }
+}
+
+=item $ff->output_file
+
+The name of the output file. This is the same as $ff->file,
+but any query parameters are stripped off. For example:
+
+ http://example.com/index.html?x=y
+
+would make the output file be C<index.html> rather than
+C<index.html?x=y>.
+
+=back
+
+=cut
+
+sub output_file {
+ my $self = shift;
+ my $file = $self->file;
+
+ $file =~ s/\?.*$//g;
+
+ return $file;
+}
+
+### XXX do this or just point to URI::Escape?
+# =head2 $esc_uri = $ff->escaped_uri
+#
+# =cut
+#
+# ### most of this is stolen straight from URI::escape
+# { ### Build a char->hex map
+# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
+#
+# sub escaped_uri {
+# my $self = shift;
+# my $uri = $self->uri;
+#
+# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
+# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
+# $escapes{$1} || $self->_fail_hi($1)/ge;
+#
+# return $uri;
+# }
+#
+# sub _fail_hi {
+# my $self = shift;
+# my $char = shift;
+#
+# $self->_error(loc(
+# "Can't escape '%1', try using the '%2' module instead",
+# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
+# ));
+# }
+#
+# sub output_file {
+#
+# }
+#
+#
+# }
+
=head1 METHODS
=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
my $href = __PACKAGE__->_parse_uri( $uri ) or return;
### make it into a FFI object ###
- my $ffi = File::Fetch::Item->new( %$href ) or return;
+ my $ff = File::Fetch->_create( %$href ) or return;
### return the object ###
- return $ffi;
+ return $ff;
}
### parses an uri to a hash structure:
### method is known to fail ###
next if $METHOD_FAIL->{$method};
- if(my $file = $self->$sub(to=>File::Spec->catfile($to,$self->file))){
+ ### there's serious issues with IPC::Run and quoting of command
+ ### line arguments. using quotes in the wrong place breaks things,
+ ### and in the case of say,
+ ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
+ ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
+ ### it doesn't matter how you quote, it always fails.
+ local $IPC::Cmd::USE_IPC_RUN = 0;
+
+ if( my $file = $self->$sub(
+ to => File::Spec->catfile( $to, $self->output_file )
+ )){
unless( -e $file && -s _ ) {
$self->_error(loc("'%1' said it fetched '%2', ".
return;
}
-=head1 ACCESSORS
-
-A C<File::Fetch> object has the following accessors
-
-=over 4
-
-=item $ff->uri
-
-The uri you passed to the constructor
-
-=item $ff->scheme
-
-The scheme from the uri (like 'file', 'http', etc)
-
-=item $ff->host
-
-The hostname in the uri, will be empty for a 'file' scheme.
-
-=item $ff->path
-
-The path from the uri, will be at least a single '/'.
-
-=item $ff->file
-
-The name of the remote file. Will be used as the name for the local
-file as well.
-
-=back
-
-=cut
-
########################
### _*_fetch methods ###
########################
push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
### set the output document, add the uri ###
- push @$cmd, '--output-document', $to, $self->uri;
+ push @$cmd, '--output-document',
+ ### DO NOT quote things for IPC::Run, it breaks stuff.
+ $IPC::Cmd::USE_IPC_RUN
+ ? ($to, $self->uri)
+ : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
### shell out ###
my $captured;
- unless( run( command => $cmd, buffer => \$captured, verbose => 0 ) ) {
+ unless(run( command => $cmd,
+ buffer => \$captured,
+ verbose => $DEBUG
+ )) {
### wget creates the output document always, even if the fetch
### fails.. so unlink it in that case
1 while unlink $to;
};
check( $tmpl, \%hash ) or return;
- ### see if we have a wget binary ###
+ ### see if we have a ftp binary ###
if( my $ftp = can_run('ftp') ) {
my $fh = FileHandle->new;
"cd /",
"cd " . $self->path,
"binary",
- "get " . $self->file . " " . $self->file,
+ "get " . $self->file . " " . $self->output_file,
"quit",
);
};
check( $tmpl, \%hash ) or return;
- ### see if we have a wget binary ###
+ ### see if we have a lynx binary ###
if( my $lynx = can_run('lynx') ) {
+ unless( IPC::Cmd->can_capture_buffer ) {
+ $METHOD_FAIL->{'lynx'} = 1;
+
+ return $self->_error(loc(
+ "Can not capture buffers. Can not use '%1' to fetch files",
+ 'lynx' ));
+ }
### write to the output file ourselves, since lynx ass_u_mes to much
my $local = FileHandle->new(">$to")
push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
- push @$cmd, $self->uri;
+ ### DO NOT quote things for IPC::Run, it breaks stuff.
+ push @$cmd, $IPC::Cmd::USE_IPC_RUN
+ ? $self->uri
+ : QUOTE. $self->uri .QUOTE;
+
### shell out ###
my $captured;
### if $FTP_PASSIVE is set
return if $FTP_PASSIVE;
- ### see if we have a wget binary ###
+ ### see if we have a ncftp binary ###
if( my $ncftp = can_run('ncftp') ) {
my $cmd = [
$self->host, # hostname
dirname($to), # local dir for the file
# remote path to the file
- File::Spec::Unix->catdir( $self->path, $self->file ),
+ ### DO NOT quote things for IPC::Run, it breaks stuff.
+ $IPC::Cmd::USE_IPC_RUN
+ ? File::Spec::Unix->catdir( $self->path, $self->file )
+ : QUOTE. File::Spec::Unix->catdir(
+ $self->path, $self->file ) .QUOTE
+
];
### shell out ###
### curl doesn't follow 302 (temporarily moved) etc automatically
### so we add --location to enable that.
- push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
+ push @$cmd, '--fail', '--location', '--output',
+ ### DO NOT quote things for IPC::Run, it breaks stuff.
+ $IPC::Cmd::USE_IPC_RUN
+ ? ($to, $self->uri)
+ : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
my $captured;
unless(run( command => $cmd,
push(@$cmd, '--quiet') unless $DEBUG;
- push @$cmd, $self->uri, $to;
+ ### DO NOT quote things for IPC::Run, it breaks stuff.
+ push @$cmd, $IPC::Cmd::USE_IPC_RUN
+ ? ($self->uri, $to)
+ : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);
my $captured;
unless(run( command => $cmd,
=cut
-### Error handling, the way Archive::Tar does it ###
-{
- my $error = '';
- my $longmess = '';
-
- sub _error {
- my $self = shift;
- $error = shift;
- $longmess = Carp::longmess($error);
-
- ### set Archive::Tar::WARN to 0 to disable printing
- ### of errors
- if( $WARN ) {
- carp $DEBUG ? $longmess : $error;
- }
-
- return;
+### error handling the way Archive::Extract does it
+sub _error {
+ my $self = shift;
+ my $error = shift;
+
+ $self->_error_msg( $error );
+ $self->_error_msg_long( Carp::longmess($error) );
+
+ if( $WARN ) {
+ carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
}
- sub error {
- my $self = shift;
- return shift() ? $longmess : $error;
- }
+ return;
}
+sub error {
+ my $self = shift;
+ return shift() ? $self->_error_msg_long : $self->_error_msg;
+}
1;
Therefor, we recommend to only use C<lynx> as a last resort. This is
why it is at the back of our list of methods to try as well.
+=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
+
+C<File::Fetch> is relatively smart about things. When trying to write
+a file to disk, it removes the C<query parameters> (see the
+C<output_file> method for details) from the file name before creating
+it. In most cases this suffices.
+
+If you have any other characters you need to escape, please install
+the C<URI::Escape> module from CPAN, and pre-encode your URI before
+passing it to C<File::Fetch>. You can read about the details of URIs
+and URI encoding here:
+
+ http://www.faqs.org/rfcs/rfc2396.html
+
=head1 TODO
=over 4
=head1 AUTHORS
-This module by
-Jos Boumans E<lt>kane@cpan.orgE<gt>.
+This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
=head1 COPYRIGHT
-This module is
-copyright (c) 2003 Jos Boumans E<lt>kane@cpan.orgE<gt>.
-All rights reserved.
+This module is copyright (c) 2003-2007 Jos Boumans
+E<lt>kane@cpan.orgE<gt>. All rights reserved.
This library is free software;
you may redistribute and/or modify it under the same
use File::Basename qw[basename];
use Data::Dumper;
+use_ok('File::Fetch');
+
+### optionally set debugging ###
+$File::Fetch::DEBUG = $File::Fetch::DEBUG = 1 if $ARGV[0];
+$IPC::Cmd::DEBUG = $IPC::Cmd::DEBUG = 1 if $ARGV[0];
+
unless( $ENV{PERL_CORE} ) {
warn qq[
];
- sleep 3;
+ sleep 3 unless $File::Fetch::DEBUG;
}
-use_ok('File::Fetch');
-use_ok('File::Fetch::Item');
-
-### optionally set debugging ###
-$File::Fetch::DEBUG = $File::Fetch::DEBUG = 1 if $ARGV[0];
+### show us the tools IPC::Cmd will use to run binary programs
+if( $File::Fetch::DEBUG ) {
+ ### stupid 'used only once' warnings ;(
+ diag( "IPC::Run enabled: " .
+ $IPC::Cmd::USE_IPC_RUN || $IPC::Cmd::USE_IPC_RUN );
+ diag( "IPC::Run available: " . IPC::Cmd->can_use_ipc_run );
+ diag( "IPC::Run vesion: $IPC::Run::VERSION" );
+ diag( "IPC::Open3 enabled: " .
+ $IPC::Cmd::USE_IPC_OPEN3 || $IPC::Cmd::USE_IPC_OPEN3 );
+ diag( "IPC::Open3 available: " . IPC::Cmd->can_use_ipc_open3 );
+ diag( "IPC::Open3 vesion: $IPC::Open3::VERSION" );
+}
### _parse_uri tests
my $map = [
file => 'foo.txt',
},
{ uri => 'rsync://cpan.pair.com/CPAN/MIRRORING.FROM',
- scheme => 'rsync',
- host => 'cpan.pair.com',
- path => '/CPAN/',
- file => 'MIRRORING.FROM',
+ scheme => 'rsync',
+ host => 'cpan.pair.com',
+ path => '/CPAN/',
+ file => 'MIRRORING.FROM',
},
];
}
}
-### File::Fetch::Item tests ###
+### File::Fetch->new tests ###
for my $entry (@$map) {
- my $ffi = File::Fetch::Item->new( %$entry );
- isa_ok( $ffi, 'File::Fetch::Item' );
+ my $ff = File::Fetch->new( uri => $entry->{uri} );
+ isa_ok( $ff, "File::Fetch" );
for my $acc ( keys %$entry ) {
- is( $ffi->$acc(), $entry->{$acc},
+ is( $ff->$acc(), $entry->{$acc},
" Accessor '$acc' ok" );
}
}
-### File::Fetch->new tests ###
-for my $entry (@$map) {
- my $ff = File::Fetch->new( uri => $entry->{uri} );
- isa_ok( $ff, "File::Fetch::Item" );
-}
-
### fetch() tests ###
### file:// tests ###
}
### http:// tests ###
-{ my $uri = 'http://www.cpan.org/index.html';
-
- for (qw[lwp wget curl lynx]) {
- _fetch_uri( http => $uri, $_ );
+{ for my $uri ( 'http://www.cpan.org/index.html',
+ 'http://www.cpan.org/index.html?q=1&y=2'
+ ) {
+ for (qw[lwp wget curl lynx]) {
+ _fetch_uri( http => $uri, $_ );
+ }
}
}
my $method = shift or return;
SKIP: {
- skip "'$method' fetching tests disabled under perl core", 3
+ skip "'$method' fetching tests disabled under perl core", 4
if $ENV{PERL_CORE};
### stupid warnings ###
my $file = $ff->fetch( to => 'tmp' );
SKIP: {
- skip "You do not have '$method' installed", 2
+ skip "You do not have '$method' installed/available", 3
if $File::Fetch::METHOD_FAIL->{$method} &&
$File::Fetch::METHOD_FAIL->{$method};
ok( $file, " File ($file) fetched using $method" );
- ok( -s $file, " File ($file) has size" );
+ ok( -s $file, " File has size" );
+ is( basename($file), $ff->output_file,
+ " File has expected name" );
unlink $file;
}