From af21f63deaf607173c752c3645073795e1243bf3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 12 May 2010 21:13:30 -0400 Subject: [PATCH] Faster generation of bzip2 deltas: Do not repeatedly decompress input file. --- debian/changelog | 7 +++++++ pristine-bz2 | 52 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/debian/changelog b/debian/changelog index ddf2697..f74f1cc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +pristine-tar (1.02) UNRELEASED; urgency=low + + * Faster generation of bzip2 deltas: Do not repeatedly decompress + input file. + + -- Joey Hess Wed, 12 May 2010 21:12:58 -0400 + pristine-tar (1.01) unstable; urgency=low * Use italics in man pages to highlight which parts of command diff --git a/pristine-bz2 b/pristine-bz2 index cca9377..83f818f 100755 --- a/pristine-bz2 +++ b/pristine-bz2 @@ -81,6 +81,7 @@ use strict; use File::Temp; use Getopt::Long; use File::Basename qw/basename/; +use IPC::Open2; use constant BZIP2_DEBUG => 1; @@ -117,6 +118,16 @@ sub doit { } } +sub doit_redir { + no warnings 'once'; + my ($in, $out, @args) = @_; + vprint(@args, "<", $in, ">", $out); + open INFILE, "<", $in or die("Could not open '$in' for reading: $!\n"); + open OUTFILE, ">", $out or die("Could not open '$out' for reading: $!\n"); + my $pid = open2(">&OUTFILE", "<&INFILE", @args); + waitpid $pid, 0; +} + sub tempdir { return File::Temp::tempdir("pristine-bz2.XXXXXXXXXX", TMPDIR => 1, CLEANUP => !$keep); @@ -169,36 +180,27 @@ sub comparefiles { } sub testvariant { - my ($old, $new, $bzip2_program, @args) = @_; - - # Unzip attempt of the previous run (or the original on the first run) - doit('bunzip2', $new); - if (-e $new) { - die "bunzip2 failed, aborting"; - } - else { - $new =~ s/\.bz2$//; - unless (-e $new) { - die("bunzip2 succeeded but I can't find the new file"); - } - } + my ($old, $tmpin, $bzip2_program, @args) = @_; # try bzip2'ing with the arguments passed - doit($bzip2_program, @args, $new); - $new .= '.bz2'; - unless (-e $new) { + doit($bzip2_program, @args, $tmpin); + $tmpin .= '.bz2'; + unless (-e $tmpin) { die("$bzip2_program failed, aborting"); } # and compare the generated with the original - return !comparefiles($old, $new); + return !comparefiles($old, $tmpin); } sub reproducebzip2 { - my ($wd, $orig, $new) = (shift, shift, shift); + my ($wd, $orig) = (shift, shift); + + my $tmpin="$wd/test"; + doit_redir($orig, "$tmpin.bak", "bzip2", "-dc"); # read fields from bzip2 headers - my ($level) = readbzip2($new); + my ($level) = readbzip2($orig); debug("level: $level"); # try to guess the bzip2 arguments that are needed by the @@ -206,7 +208,14 @@ sub reproducebzip2 { my @args = predictbzip2args($level); foreach my $program (@supported_bzip2_programs) { - testvariant($orig, $new, $program, @args) + # some compressors eat the uncompressed file, some + # do not; restore as needed + if (! -e $tmpin) { + doit("cp", "-a", "$tmpin.bak", "$tmpin"); + + } + + testvariant($orig, $tmpin, $program, @args) && return $program, @args; } # More to come? @@ -291,9 +300,8 @@ sub gendelta { my @files=qw(version type params program); - doit("cp", $bzip2file, "$tempdir/test.bz2"); my ($program, @params)= - reproducebzip2($tempdir, $bzip2file, "$tempdir/test.bz2"); + reproducebzip2($tempdir, $bzip2file); open(OUT, ">", "$tempdir/version") || die "$!"; print OUT "2.0\n"; -- 2.7.4