5 pristine-xz - regenerate pristine xz files
9 B<pristine-xz> [-vdk] gendelta I<file.xz> I<delta>
11 B<pristine-xz> [-vdk] genxz I<delta> I<file>
15 This is a complement to the pristine-tar(1) command. Normally you
16 don't need to run it by hand, since pristine-tar calls it as necessary
17 to handle .tar.xz files.
19 pristine-xz gendelta takes the specified I<xz> file, and generates a
20 small binary I<delta> file that can later be used by pristine-xz genxz
21 to recreate the original file.
23 pristine-xz genxz takes the specified I<delta> file, and compresses the
24 specified input I<file> (which must be identical to the contents of the
25 original xz file). The resulting file will be identical to
26 the original gz file used to create the delta.
28 The approach used to regenerate the original xz file is to figure out
29 how it was produced -- what compression level was used, etc. Currently
30 support is poor for xz files produced with unusual compression options.
32 If the delta filename is "-", pristine-xz reads or writes it to stdio.
40 Verbose mode, show each command that is run.
48 Don't clean up the temporary directory on exit.
52 Try harder to determine how to generate deltas of difficult xz files.
62 Specifies a location to place temporary files, other than the default.
68 Joey Hess <joeyh@debian.org>,
69 Faidon Liambotis <paravoid@debian.org>,
70 Cyril Brulebois <cyril.brulebois@enst-bretagne.fr>
72 Licensed under the GPL, version 2.
79 use Pristine::Tar::Delta;
80 use Pristine::Tar::Formats;
81 use File::Basename qw/basename/;
84 my @supported_xz_programs = qw(xz);
91 genxz => [\&genxz, 2],
92 gendelta => [\&gendelta, 2],
100 print STDERR "Usage: pristine-xz [-vdkt] gendelta file.xz delta\n";
101 print STDERR " pristine-xz [-vdkt] genxz delta file\n";
105 my ($hash, $labels, $fields) = @_;
106 @$hash{@$labels} = @$fields[1..scalar(@$labels)];
109 sub scan_xz_lvv_robot {
111 # We need at least version 5.0 to get a proper '-lvv --robot'
113 my $cmd = "xz -lvv --robot $filename";
114 my $ret = open (my $in, "$cmd |") || die "$cmd failed: $!";
115 my %xz = (file => {}, stream => {}, blocks => [],
116 summary => {}, totals => {});
117 my (%file, %stream, @blocks, %summary, %totals);
118 my @file_labels = qw{nb_streams nb_blocks compressed uncompressed
119 ratio checks padding_size};
121 qw{stream_num nb_blocks compressed_offset uncompressed_offset
122 compressed_size uncompressed_size ratio check_name
125 qw{stream_num block_in_stream block_in_file compressed_offset
126 uncompressed_offset compressed_size uncompressed_size ratio
127 check_name check_value header_size size_present_flags
128 actual_compressed_size uncompress_memory filter_chain};
129 my @summary_labels = qw{uncompressed_memory size_in_blocks};
131 qw{nb_streams nb_blocks compressed_size uncompressed_size ratio
132 check_names padding_size nb_files uncompressed_memory
135 while (my $line = <$in>) {
137 my @fields = split(/\t/, $line);
138 if ($fields[0] eq 'name') {
141 if ($fields[0] eq 'file') {
142 assign_fields($xz{file}, \@file_labels, \@fields);
145 if ($fields[0] eq 'stream') {
146 assign_fields($xz{stream}, \@stream_labels, \@fields);
149 if ($fields[0] eq 'block') {
151 assign_fields(\%block, \@block_labels, \@fields);
152 push @{$xz{blocks}}, \%block;
155 if ($fields[0] eq 'summary') {
156 assign_fields($xz{summary}, \@summary_labels, \@fields);
159 if ($fields[0] eq 'totals') {
160 assign_fields($xz{totals}, \@totals_labels, \@fields);
168 sub predict_xz_args {
171 my $block_list = undef;
172 my $blocks = $xz->{blocks};
173 if (scalar(@$blocks)) {
174 # There is at least one block. We assume the same compression
175 # level for all blocks
176 my $block = $blocks->[0];
177 my @filters = split(/,/, $block->{filter_chain});
178 if (scalar(@filters) != 1 || $filters[0] !~ /^--lzma2=/) {
179 die "Only LZMA2 is supported";
181 # Deduce the presets from the dict size
182 if ($filters[0] =~ /--lzma2=dict=(.*)/) {
184 my %lzma2_presets_from_dict_size_of =
188 '4MiB' => ['4', '3'],
189 # Put 6 before 5 as it's the default and is
190 # more likely to be right
191 '8MiB' => ['6', '5'],
196 $presets = $lzma2_presets_from_dict_size_of{$dict_size};
197 die "Unkown dict size: $dict_size\n"
198 if (!defined($presets));
200 if (scalar(@$blocks) > 1) {
201 # Gather the block uncompressed sizes
202 $block_list = join(',', map {$_->{uncompressed_size}}
210 'SHA-256' => 'sha256',
212 my $check_name = $xz->{stream}->{check_name};
213 my $check_kwd = $check_kwd_of{$check_name};
214 die "Unknown xz check: $check_name\n" if (!defined($check_kwd));
216 my $possible_args = [];
217 my $common = ["--check=$check_kwd", "-z"];
218 if (defined($block_list)) {
219 unshift @$common, "--block-list=$block_list";
221 foreach my $preset (@$presets) {
222 push @$possible_args, [@$common, "-$preset"];
223 push @$possible_args, [@$common, "-${preset}e"];
225 return $possible_args;
229 my $filename = shift;
231 if (! is_xz($filename)) {
232 error "This is not a valid xz archive.";
235 # This will guess the compression level, check and blocks from the file.
236 # More info is still needed if the level used was 3/4 or 5/6 (see
237 # lzma2_presets_from_dict_size_of in predict_xz_args) or if --extreme
238 # was used. We output possible args for each combination in this case.
239 my $xz = scan_xz_lvv_robot($filename);
240 my $possible_args = predict_xz_args($xz);
241 return $possible_args;
244 sub predictxzlevels {
245 my $filename = shift;
247 if (! is_xz($filename)) {
248 error "This is not a valid xz archive.";
251 # XXX We don't currently have a way to guess the level from the
252 # file format, as this level only presets several other tunables.
253 # Correct handling would involve finding as many preset values as
254 # possible, and reconstructing the compression level from that.
256 # So far in the wild only these levels have been seen.
257 # (Note that level 9 can use a lot of memory.)
258 my $possible_levels = ["6", "9", "0", "6e", "9e", "0e"];
260 return ($possible_levels);
264 my ($possible_levels, $program) = @_;
267 foreach my $level (@$possible_levels) {
268 push @args, ["-z", "-$level"];
269 push @args, ["-z", "-$level", "--check=crc32"];
270 push @args, ["-z", "-$level", "--check=sha256"];
276 my ($old, $tmpin, $xz_program, @args) = @_;
278 my $new=$tmpin.'.xz';
281 # Note that file name, mode, mtime do not matter to xz.
283 # try xz'ing with the arguments passed
284 doit_redir($tmpin, $new, $xz_program, @args);
287 die("$xz_program failed, aborting");
290 # and compare the generated with the original
291 return !comparefiles($old, $new);
299 my $tmpin="$wd/test";
300 doit_redir($orig, $tmpin, "xz", "-dc");
302 # read fields from xz headers
305 $possible_args = readxz($orig);
307 # If we get an error we fallback to guessing, otherwise, we should
308 # succeed with one of the proposed combinations
310 foreach my $program (@supported_xz_programs) {
311 foreach my $args (@$possible_args) {
312 testvariant($orig, $tmpin, $program, @$args)
313 && return $program, @$args;
318 # Fallback to guessing
319 my ($possible_levels) = predictxzlevels($orig);
321 foreach my $program (@supported_xz_programs) {
322 # try to guess the xz arguments that are needed
323 foreach my $args (predictxzargs($possible_levels,
325 testvariant($orig, $tmpin, $program, @$args)
326 && return $program, @$args;
331 print STDERR "pristine-xz failed to reproduce build of $orig\n";
332 print STDERR "(Please file a bug report.)\n";
340 my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile);
341 Pristine::Tar::Delta::assert($delta, type => "xz", maxversion => 2,
342 fields => [qw{params program}]);
344 my @params=split(' ', $delta->{params});
346 my $param=shift @params;
348 next if $param=~/^(-[0-9]e?)$/;
349 next if $param eq '-z';
350 next if $param eq '--check=none';
351 next if $param eq '--check=crc32';
352 next if $param eq '--check=crc64';
353 next if $param eq '--check=sha256';
354 next if $param=~/^(--block-list=[0-9,]+)$/;
355 die "paranoia check failed on params from delta ($param)";
357 @params=split(' ', $delta->{params});
359 my $program=$delta->{program};
360 if (! grep { $program eq $_ } @supported_xz_programs) {
361 die "paranoia check failed on program from delta ($program)";
364 doit($program, @params, $file);
371 my ($program, @params) = reproducexz($xzfile);
373 Pristine::Tar::Delta::write(Tarball => $deltafile, {