5 pristine-gz - regenerate pristine gz files
9 B<pristine-gz> [-vdk] gendelta I<file.gz> I<delta>
11 B<pristine-gz> [-vdk] gengz I<delta> I<file>
15 This is a complement to the pristine-tar(1) command. Normally you don't
16 need to run it by hand, since pristine-tar calls it as necessary to handle
19 pristine-gz gendelta takes the specified I<gz> file, and generates a
20 small binary I<delta> file that can later be used by pristine-gz gengz
21 to recreate the original file.
23 pristine-gz gengz takes the specified I<delta> file, and compresses
24 the specified input I<file> (which must be identical to the contents
25 of the original gz file). The resulting file will be identical to
26 the original gz file used to create the delta.
28 The approach used to regenerate the original gz file is to figure out how
29 it was produced -- what compression level was used, whether it was built
30 with GNU gzip(1) or with a library or BSD version, whether the --rsyncable
31 option was used, etc, and to reproduce this build environment when
34 This approach will work for about 99.5% of cases. One example of a case it
35 cannot currently support is a gz file that has been produced by appending
36 together multiple gz files.
38 For the few where it doesn't work, a binary diff will be included in the
39 delta between the closest regneratable gz file and the original. In
40 the worst case, the diff will include the entire content of the original
41 gz file, resulting in a larger than usual delta. If the delta is much
42 larger than usual, pristine-gz will print a warning.
44 If the delta filename is "-", pristine-gz reads or writes it to stdio.
54 Verbose mode, show each command that is run.
66 Don't clean up the temporary directory on exit.
76 Specifies a location to place temporary files, other than the default.
82 Joey Hess <joeyh@debian.org>,
83 Faidon Liambotis <paravoid@debian.org>
84 Josh Triplett <josh@joshtriplett.org>
86 Licensed under the GPL, version 2.
93 use Pristine::Tar::Delta;
94 use Pristine::Tar::Formats;
95 use File::Basename qw/basename/;
102 gendelta => [\&gendelta, 2],
103 gengz => [\&gengz, 2],
108 print STDERR "Usage: pristine-gz [-vdk] gendelta file.gz delta\n";
109 print STDERR " pristine-gz [-vdk] gengz delta file\n";
113 my $filename = shift;
115 if (! is_gz($filename)) {
116 error "This is not a valid GZip archive.";
119 open(GZIP, "< $filename")
120 or die("Could not open '$filename' for reading: $!\n");
123 if (read(GZIP, $chars, 10) != 10) {
124 die("Unable to read 10 bytes from input\n");
127 my ($id1, $id2, $method, $flags, $timestamp, $level, $os, $name)
128 = (unpack("CCCb8VCC", $chars), '');
130 my @flags = split(//, $flags);
132 if ($flags[$fconstants{GZIP_FLAG_FNAME}]) {
133 # read a null-terminated string
135 while (read(GZIP, $chars, 1) == 1 && ord($chars) != 0);
139 return (\@flags, $timestamp, $level, $os, $name);
142 sub predictgzipargs {
143 my ($flags, $timestamp, $level) = @_;
147 unless ($flags[$fconstants{GZIP_FLAG_FNAME}]) {
149 push @args, '-M' if $timestamp;
152 if ($level == $fconstants{GZIP_COMPRESSION_BEST}) {
155 elsif ($level == $fconstants{GZIP_COMPRESSION_FAST}) {
163 my ($orig, $tempdir, $tempin) = @_;
164 my $tempout="$tempdir/test.gz";
165 doit_redir($orig, $tempin, "gzip", "-dc");
167 # read fields from gzip headers
168 my ($flags, $timestamp, $level, $os, $name) = readgzip($orig);
169 debug("flags: [".join(", ", @$flags).
170 "] timestamp: $timestamp level: $level os: $os name: $name");
172 # try to guess the gzip arguments that are needed by the header
174 my @args = predictgzipargs($flags, $timestamp, $level);
175 my @extraargs = ("-F", $name, "-T", $timestamp);
179 if ($os == $fconstants{GZIP_OS_UNIX}) {
180 # for 98% of the cases the simple heuristic above works
181 # and it was produced by gnu gzip.
182 push @try, ['--gnu', @args];
183 push @try, ['--gnu', @args, '--rsyncable'];
184 push @try, ['--gnu', @args, '--new-rsyncable'];
188 push @args, "--original-name", $name;
189 @extraargs = ("-T", $timestamp);
190 $name = basename($name);
193 # set the Operating System flag to the one found in the original
195 push @args, ("--osflag", $os) if $os != $fconstants{GZIP_OS_UNIX};
197 # many of the .gz out there are created using the BSD version of
198 # gzip which is using the zlib library; try with our version of
199 # bsd-gzip with added support for the undocumented GNU gzip options
203 # Perl's Compress::Raw::Zlib interfaces directly with zlib and
204 # apparently is the only implementation out there which tunes a very
205 # specific parameter of zlib, memLevel, to 9, instead of 8 which is
206 # the default. The module is used, among others, by Compress::Gzip
207 # which in turn is used by IO::Zlib. It was found on the real world on
208 # tarballs generated by Perl 5.10's Module::Build (cf. #618284)
209 push @try, [@args, '--quirk', 'perl'];
210 push @try, [@args, '--quirk', 'perl', '-1'];
212 # apparently, there is an old version of bsd-gzip (or a similar tool
213 # based on zlib) that creates gz using maximum compression (-9) but
214 # does not indicate so in the headers. surprisingly, there are many
216 push @try, [@args, '--quirk', 'buggy-bsd'];
218 # Windows' NTFS gzip implementation; quirk is really really evil
219 # it should be the last test: it can result in a corrupted archive!
220 if ($os == $fconstants{GZIP_OS_NTFS}) {
221 pop @args; pop @args; # ntfs quirk implies NTFS osflag
222 push @try, [@args, '--quirk', 'ntfs'];
225 my $origsize=(stat($orig))[7];
226 my ($bestvariant, $bestsize);
228 foreach my $variant (@try) {
229 doit_redir($tempin, $tempout, 'zgz', @$variant, @extraargs, '-c');
230 if (!comparefiles($orig, $tempout)) {
232 return $name, $timestamp, undef, @$variant;
235 # generate a binary delta and see if this is the
236 # best variant so far
237 my $ret=system("xdelta delta -0 --pristine $tempout $orig $tempdir/tmpdelta 2>/dev/null") >> 8;
238 # xdelta exits 1 on success
240 my $size=(stat("$tempdir/tmpdelta"))[7];
241 if (! defined $bestsize || $size < $bestsize) {
242 $bestvariant = $variant;
244 rename("$tempdir/tmpdelta", "$tempdir/bestdelta") || die "rename: $!";
250 # Nothing worked perfectly, so use the delta that was generated for
252 my $percentover=100 - int (($origsize-$bestsize)/$origsize*100);
253 debug("Using delta to best variant, bloating $percentover%: @$bestvariant");
254 if ($percentover > 10) {
255 print STDERR "warning: pristine-gz cannot reproduce build of $orig; ";
256 if ($percentover >= 100) {
257 print STDERR "storing entire file in delta!\n";
260 print STDERR "storing $percentover% size diff in delta\n";
262 print STDERR "(Please consider filing a bug report so the delta size can be improved.)\n";
264 return $name, $timestamp, "$tempdir/bestdelta", @$bestvariant;
271 my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile);
272 Pristine::Tar::Delta::assert($delta, type => "gz", maxversion => 3,
273 fields => [qw{params filename timestamp}]);
275 my @params=split(' ', $delta->{params});
278 next if /^(--gnu|--rsyncable|--new-rsyncable|-[nmM1-9])$/;
279 if (/^(--original-name|--quirk|--osflag)$/) {
283 die "paranoia check failed on params from delta ($_)";
285 @params=split(' ', $delta->{params});
287 my $filename=$delta->{filename};
288 $filename=~s/^.*\///; # basename isn't strong enough
290 my @zgz=("zgz", @params, "-T", $delta->{timestamp});
291 if (! grep { $_ eq "--original-name" } @params) {
292 push @zgz, "-F", $filename;
296 if (exists $delta->{delta}) {
297 my $tempdir=tempdir();
298 my $tfile="$tempdir/".basename($file).".gz";
299 doit_redir($file, $tfile, @zgz);
300 doit("xdelta", "patch", "--pristine", $delta->{delta}, $tfile, "$file.gz");
303 doit_redir("$file", "$file.gz", @zgz);
305 doit("rm", "-f", $file);
312 my $tempdir=tempdir();
313 my ($filename, $timestamp, $xdelta, @params)=
314 reproducegz($gzfile, $tempdir, "$tempdir/test");
316 Pristine::Tar::Delta::write(Tarball => $deltafile, {
317 version => (defined $xdelta ? "3.0" : "2.0"),
320 filename => basename($filename),
321 timestamp => $timestamp,
322 (defined $xdelta ? (delta => $xdelta) : ()),