From 01d11a1c3434e258d1f14bee19740fae98a6cd1b Mon Sep 17 00:00:00 2001 From: Steve Peters Date: Wed, 15 Aug 2007 14:58:57 +0000 Subject: [PATCH] Upgrade to Archive-Tar-1.34. Omitted re-addition of the Pod tests. p4raw-id: //depot/perl@31722 --- MANIFEST | 6 ++-- lib/Archive/Tar.pm | 58 ++++++++++++++++++++++++++++-- lib/Archive/Tar/File.pm | 18 +++++++--- lib/Archive/Tar/t/03_file.t | 4 +++ lib/Archive/Tar/t/src/long/bar.tar.packed | 2 +- lib/Archive/Tar/t/src/long/foo.tgz.packed | 2 +- lib/Archive/Tar/t/src/short/bar.tar.packed | 2 +- lib/Archive/Tar/t/src/short/foo.tgz.packed | 2 +- 8 files changed, 79 insertions(+), 15 deletions(-) diff --git a/MANIFEST b/MANIFEST index f07eb91..b4d5ea0 100644 --- a/MANIFEST +++ b/MANIFEST @@ -389,8 +389,8 @@ ext/Digest/SHA/t/base64.t See if Digest::SHA works ext/Digest/SHA/t/bitbuf.t See if Digest::SHA works ext/Digest/SHA/t/dumpload.t See if Digest::SHA works ext/Digest/SHA/t/fips198.t See if Digest::SHA works -ext/Digest/SHA/t/gg.t See if Digest::SHA works ext/Digest/SHA/t/gglong.t See if Digest::SHA works +ext/Digest/SHA/t/gg.t See if Digest::SHA works ext/Digest/SHA/t/hmacsha.t See if Digest::SHA works ext/Digest/SHA/t/ireland.t See if Digest::SHA works ext/Digest/SHA/t/methods.t See if Digest::SHA works @@ -2763,8 +2763,8 @@ lib/Tie/File/t/40_abs_cache.t Unit tests for Tie::File::Cache lib/Tie/File/t/41_heap.t Unit tests for Tie::File::Heap lib/Tie/File/t/42_offset.t Unit tests for the offset method lib/Tie/Handle.pm Base class for tied handles -lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle lib/Tie/Handle/stdhandle_from_handle.t Test for Tie::StdHandle/Handle backwards compat +lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle lib/Tie/Hash/NamedCapture.pm Implements %- and %+ behaviour lib/Tie/Hash.pm Base class for tied hashes lib/Tie/Memoize.pm Base class for memoized tied hashes @@ -3874,8 +3874,8 @@ t/run/switchp.t Test the -p switch t/run/switchPx.aux Data for switchPx.t t/run/switchPx.t Test the -Px combination t/run/switcht.t Test the -t switch -t/run/switchx.aux Data for switchx.t t/run/switchx2.aux Data for switchx.t +t/run/switchx.aux Data for switchx.t t/run/switchx.t Test the -x switch t/TEST The regression tester t/TestInit.pm Preamble library for core tests diff --git a/lib/Archive/Tar.pm b/lib/Archive/Tar.pm index fe0d0f8..34792e9 100644 --- a/lib/Archive/Tar.pm +++ b/lib/Archive/Tar.pm @@ -14,7 +14,7 @@ use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD $DEBUG = 0; $WARN = 1; $FOLLOW_SYMLINK = 0; -$VERSION = "1.32"; +$VERSION = "1.34"; $CHOWN = 1; $CHMOD = 1; $DO_NOT_USE_PREFIX = 0; @@ -406,7 +406,9 @@ underlying file. sub contains_file { my $self = shift; - my $full = shift or return; + my $full = shift; + + return unless defined $full; ### don't warn if the entry isn't there.. that's what this function ### is for after all. @@ -509,7 +511,7 @@ Returns true on success, false on failure. sub extract_file { my $self = shift; - my $file = shift or return; + my $file = shift; return unless defined $file; my $alt = shift; my $entry = $self->_find_entry( $file ) @@ -1669,6 +1671,56 @@ write a C<.tar.Z> file $tar->write($fh); $fh->close ; +=item How do I handle Unicode strings? + +C uses byte semantics for any files it reads from or writes +to disk. This is not a problem if you only deal with files and never +look at their content or work solely with byte strings. But if you use +Unicode strings with character semantics, some additional steps need +to be taken. + +For example, if you add a Unicode string like + + # Problem + $tar->add_data('file.txt', "Euro: \x{20AC}"); + +then there will be a problem later when the tarfile gets written out +to disk via C<$tar->write()>: + + Wide character in print at .../Archive/Tar.pm line 1014. + +The data was added as a Unicode string and when writing it out to disk, +the C<:utf8> line discipline wasn't set by C, so Perl +tried to convert the string to ISO-8859 and failed. The written file +now contains garbage. + +For this reason, Unicode strings need to be converted to UTF-8-encoded +bytestrings before they are handed off to C: + + use Encode; + my $data = "Accented character: \x{20AC}"; + $data = encode('utf8', $data); + + $tar->add_data('file.txt', $data); + +A opposite problem occurs if you extract a UTF8-encoded file from a +tarball. Using C on the C object +will return its content as a bytestring, not as a Unicode string. + +If you want it to be a Unicode string (because you want character +semantics with operations like regular expression matching), you need +to decode the UTF8-encoded content and have Perl convert it into +a Unicode string: + + use Encode; + my $data = $tar->get_content(); + + # Make it a Unicode string + $data = decode('utf8', $data); + +There is no easy way to provide this functionality in C, +because a tarball can contain many files, and each of which could be +encoded in a different way. =back diff --git a/lib/Archive/Tar/File.pm b/lib/Archive/Tar/File.pm index fe5480d..8c96577 100644 --- a/lib/Archive/Tar/File.pm +++ b/lib/Archive/Tar/File.pm @@ -200,7 +200,7 @@ sub clone { sub _new_from_chunk { my $class = shift; - my $chunk = shift or return; + my $chunk = shift or return; # 512 bytes of tar header my %hash = @_; ### filter any arguments on defined-ness of values. @@ -233,7 +233,11 @@ sub _new_from_chunk { sub _new_from_file { my $class = shift; - my $path = shift or return; + my $path = shift; + + ### path has to at least exist + return unless defined $path; + my $type = __PACKAGE__->_filetype($path); my $data = ''; @@ -304,7 +308,7 @@ sub _new_from_file { sub _new_from_data { my $class = shift; - my $path = shift or return; + my $path = shift; return unless defined $path; my $data = shift; return unless defined $data; my $opt = shift; @@ -371,7 +375,9 @@ sub _prefix_and_file { sub _filetype { my $self = shift; - my $file = shift or return; + my $file = shift; + + return unless defined $file; return SYMLINK if (-l $file); # Symlink @@ -515,7 +521,9 @@ Returns true on success and false on failure. sub rename { my $self = shift; - my $path = shift or return; + my $path = shift; + + return unless defined $path; my ($prefix,$file) = $self->_prefix_and_file( $path ); diff --git a/lib/Archive/Tar/t/03_file.t b/lib/Archive/Tar/t/03_file.t index 9d4e755..33c1cf2 100644 --- a/lib/Archive/Tar/t/03_file.t +++ b/lib/Archive/Tar/t/03_file.t @@ -20,6 +20,10 @@ my @test_files = ( [ 'x/bIn1', $all_chars ], [ 'bIn2', $all_chars x 2 ], [ 'bIn0', '' ], + + ### we didnt handle 'false' filenames very well across A::T as of version + ### 1.32, as reported in #28687. Test for the handling of such files here. + [ 0, '', ], ### keep this one as the last entry [ 'x/yy/z', '', { type => DIR, diff --git a/lib/Archive/Tar/t/src/long/bar.tar.packed b/lib/Archive/Tar/t/src/long/bar.tar.packed index 7a4a2d4..2e86525 100644 --- a/lib/Archive/Tar/t/src/long/bar.tar.packed +++ b/lib/Archive/Tar/t/src/long/bar.tar.packed @@ -10,7 +10,7 @@ To recreate it use the following command: uupacktool.pl -p lib/Archive/Tar/t/src/long/bar.tar lib/Archive/Tar/t/src/long/bar.tar.packed -Created at Thu May 24 15:38:19 2007 +Created at Wed Aug 15 15:56:07 2007 ######################################################################### __UU__ M8P`````````````````````````````````````````````````````````` diff --git a/lib/Archive/Tar/t/src/long/foo.tgz.packed b/lib/Archive/Tar/t/src/long/foo.tgz.packed index f9464f7..8ea6684 100644 --- a/lib/Archive/Tar/t/src/long/foo.tgz.packed +++ b/lib/Archive/Tar/t/src/long/foo.tgz.packed @@ -10,7 +10,7 @@ To recreate it use the following command: uupacktool.pl -p lib/Archive/Tar/t/src/long/foo.tgz lib/Archive/Tar/t/src/long/foo.tgz.packed -Created at Thu May 24 15:38:19 2007 +Created at Wed Aug 15 15:56:07 2007 ######################################################################### __UU__ M'XL(`````````^W72VZ#,!`&8*]S"BY`F,$/MCT`ET")25`