#!/usr/bin/perl =head1 NAME pristine-tar - regenerate pristine tarballs =head1 SYNOPSIS B [-vdk] gendelta I I B [-vdk] gentar I I B [-vdk] [-m message] commit I [I] B [-vdk] checkout I B [-vdk] list =head1 DESCRIPTION pristine-tar can regenerate an exact copy of a pristine upstream tarball using only a small binary I file and the contents of the tarball, which are typically kept in an I branch in version control. The I file is designed to be checked into version control along-side the I branch, thus allowing Debian packages to be built entirely using sources in version control, without the need to keep copies of upstream tarballs. pristine-tar supports compressed tarballs, calling out to pristine-gz(1), pristine-bz2(1), and pristine-xz(1) to produce the pristine gzip, bzip2, and xz files. =head1 COMMANDS =over 4 =item pristine-tar gendelta I I This takes the specified upstream I, and generates a small binary delta file that can later be used by pristine-tar gentar to recreate the tarball. If the delta filename is "-", it is written to standard output. =item pristine-tar gentar I I This takes the specified I file, and the files in the current directory, which must have identical content to those in the upstream tarball, and uses these to regenerate the pristine upstream I. If the delta filename is "-", it is read from standard input. =item pristine-tar commit I [I] B generates a pristine-tar delta file for the specified I, and commits it to version control. The B command can later be used to recreate the original tarball based only on the information stored in version control. The I parameter specifies the tag or branch that contains the same content that is present in the tarball. This defaults to "refs/heads/upstream", or if there's no such branch, any branch matching "upstream". The name of the tree it points to will be recorded for later use by B. Note that the content does not need to be 100% identical to the content of the tarball, but if it is not, additional space will be used in the delta file. The delta files are stored in a branch named "pristine-tar", with filenames corresponding to the input tarball, with ".delta" appended. This branch is created or updated as needed to add each new delta. =item pristine-tar checkout I This regenerates a copy of the specified I using information previously saved in version control by B. =item pristine-tar list This lists tarballs that pristine-tar is able to checkout from version control. =back =head1 OPTIONS =over 4 =item -v =item --verbose Verbose mode, show each command that is run. =item -d =item --debug Debug mode. =item -k =item --keep Don't clean up the temporary directory on exit. =item -m message =item --message=message Use this option to specify a custom commit message to pristine-tar commit. =back =head1 EXAMPLES Suppose you maintain the hello package, in a git repository. You have just created a tarball of the release, I, which you will upload to a "forge" site. You want to ensure that, if the "forge" loses the tarball, you can always recreate exactly that same tarball. And you'd prefer not to keep copies of tarballs for every release, as that could use a lot of disk space when hello gets the background mp3s and user-contributed levels you are planning for version 2.0. The solution is to use pristine-tar to commit a delta file that efficiently stores enough information to reproduce the tarball later. cd hello git tag -s 1.0 pristine-tar commit ../hello-1.0.tar.gz 1.0 Remember to tell git to push both the pristine-tar branch, and your tag: git push --all --tags Now it is a year later. The worst has come to pass; the "forge" lost all its data, you deleted the tarballs to make room for bug report emails, and you want to regenerate them. Happily, the git repository is still available. git clone git://github.com/joeyh/hello.git cd hello pristine-tar checkout ../hello-1.0.tar.gz =head1 LIMITATIONS Only tarballs, gzipped tarballs, bzip2ed tarballs, and xzed tarballs are currently supported. Currently only the git revision control system is supported by the "checkout" and "commit" commands. It's ok if the working copy is not clean or has uncommitted changes, or has changes staged in the index; none of that will be touched by "checkout" or "commit". =head1 ENVIRONMENT =over 4 =item B Specifies a location to place temporary files, other than the default. =back =head1 AUTHOR Joey Hess Licensed under the GPL, version 2 or above. =cut use warnings; use strict; use Pristine::Tar; use Pristine::Tar::Delta; use Pristine::Tar::Formats; use File::Path; use File::Basename; use Cwd qw{getcwd abs_path}; # Force locale to C since tar may output utf-8 filenames differently # depending on the locale. $ENV{LANG}='C'; # Don't let environment change tar's behavior. delete $ENV{TAR_OPTIONS}; delete $ENV{TAPE}; # The following two assignments are potentially munged during the # build process to hold the values of TAR_PROGRAM and XDELTA_PROGRAM # parameters as given to Makefile.PL. my $tar_program = "tar"; my $xdelta_program = "xdelta"; my $message; dispatch( commands => { usage => [\&usage], gentar => [\&gentar, 2], gendelta => [\&gendelta, 2], commit => [\&commit], ci => [\&commit, 1], checkout => [\&checkout, 1], co => [\&checkout, 1], list => [\&list, 0], }, options => { "m|message=s" => \$message, }, ); sub usage { print STDERR "Usage: pristine-tar [-vdk] gendelta tarball delta\n"; print STDERR " pristine-tar [-vdk] gentar delta tarball\n"; print STDERR " pristine-tar [-vdk] [-m message] commit tarball [upstream]\n"; print STDERR " pristine-tar [-vdk] checkout tarball\n"; print STDERR " pristine-tar list\n"; exit 1; } my $recreatetarball_tempdir; sub recreatetarball { my $manifestfile=shift; my $source=shift; my %options=@_; my $tempdir=tempdir(); my @manifest; open (IN, "<", $manifestfile) || die "$manifestfile: $!"; while () { chomp; push @manifest, $_; } close IN; link($manifestfile, "$tempdir/manifest") || die "link $tempdir/manifest: $!"; # The manifest and source should have the same filenames, # but the manifest probably has all the files under a common # subdirectory. Check if it does. my $subdir=""; foreach my $file (@manifest) { #debug("file: $file"); if ($file=~m!^(/?[^/]+)(/|$)!) { if (length $subdir && $subdir ne $1) { debug("found file not in subdir $subdir: $file"); $subdir=""; last; } elsif (! length $subdir) { $subdir=$1; debug("set subdir to $subdir"); } } else { debug("found file not in subdir: $file"); $subdir=""; last; } } if (length $subdir) { debug("subdir is $subdir"); doit("mkdir", "$tempdir/workdir"); $subdir="/$subdir"; } if (! $options{clobber_source}) { doit("cp", "-a", $source, "$tempdir/workdir$subdir"); } else { doit("mv", $source, "$tempdir/workdir$subdir"); } # It's important that this create an identical tarball each time # for a given set of input files. So don't include file metadata # in the tarball, since it can easily vary. my $full_sweep=0; foreach my $file (@manifest) { if (-l "$tempdir/workdir/$file") { # Can't set timestamp of a symlink, so # replace the symlink with an empty file. unlink("$tempdir/workdir/$file") || die "unlink: $!"; open(OUT, ">", "$tempdir/workdir/$file") || die "open: $!"; close OUT; } elsif (! -e "$tempdir/workdir/$file") { debug("$file is listed in the manifest but may not be present in the source directory"); $full_sweep=1; if ($options{create_missing}) { # Avoid tar failing on the nonexistent item by # creating a dummy directory. debug("creating missing $file"); mkpath "$tempdir/workdir/$file"; } } if (-d "$tempdir/workdir/$file" && (-u _ || -g _ || -k _)) { # tar behaves weirdly for some special modes # and ignores --mode, so clear them. debug("chmod $file"); chmod(0755, "$tempdir/workdir/$file") || die "chmod: $!"; } } # Set file times only after modifying of the directory content is # done. foreach my $file (@manifest) { if (-e "$tempdir/workdir/$file") { utime(0, 0, "$tempdir/workdir/$file") || die "utime: $file: $!"; } } # If some files couldn't be matched up with the manifest, # it's possible they do exist, but just with names that make sense # to tar, but not to this program. Work around this and make sure # such files have their metadata tweaked, by doing a full sweep of # the tree. if ($full_sweep) { debug("doing full tree sweep to catch missing files"); use File::Find; find(sub { if (-l $_) { unlink($_) || die "unlink: $!"; open(OUT, ">", $_) || die "open: $!"; close OUT; } if (-d $_ && (-u _ || -g _ || -k _)) { chmod(0755, $_) || die "chmod: $!"; } }, "$tempdir/workdir"); find(sub { utime(0, 0, $_) || die "utime: $_: $!"; }, "$tempdir/workdir"); } delete $ENV{TAR_LONGLINK_100}; $recreatetarball_tempdir=$tempdir; return recreatetarball_helper(); } sub recreatetarball_helper { my $tempdir=$recreatetarball_tempdir; my $ret="$tempdir/recreatetarball"; doit($tar_program, "cf", $ret, "--owner", 0, "--group", 0, "--numeric-owner", "-C", "$tempdir/workdir", "--no-recursion", "--mode", "0644", "--files-from", "$tempdir/manifest"); return $ret; } sub recreatetarball_longlink_100 { # For a long time, Debian's tar had a patch that made it output # larger tar files if a filename was exactly 100 bytes. Now that # Debian's tar has been fixed, in order to recreate the tarball # created by that version of tar, we reply on on an environment # variable to turn back on the old behavior. # # This variable is currently only available in Debian's tar, # so users of non-debian tar who want to recreate tarballs from # deltas created using the old version of Debian's tar are SOL. $ENV{TAR_LONGLINK_100}=1; my $ret=recreatetarball_helper(); delete $ENV{TAR_LONGLINK_100}; return $ret; } sub gentar { my $deltafile=shift; my $tarball=shift; my %opts=@_; my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile); Pristine::Tar::Delta::assert($delta, type => "tar", maxversion => 2, minversion => 2, fields => [qw{manifest delta}]); my $out=(defined $delta->{wrapper} ? tempdir()."/".basename($tarball).".tmp" : $tarball); my @try; push @try, sub { recreatetarball($delta->{manifest}, getcwd, clobber_source => 0, %opts) }; push @try, \&recreatetarball_longlink_100; my $ok; foreach my $variant (@try) { my $recreatetarball=$variant->(); my $ret=try_doit($xdelta_program, "patch", $delta->{delta}, $recreatetarball, $out); if ($ret == 0) { $ok=1; last; } } if (! $ok) { error "Failed to reproduce original tarball. Please file a bug report."; } if (defined $delta->{wrapper}) { my $delta_wrapper=Pristine::Tar::Delta::read(Tarball => $delta->{wrapper}); if (grep { $_ eq $delta_wrapper->{type} } qw{gz bz2 xz}) { doit("pristine-".$delta_wrapper->{type}, ($verbose ? "-v" : "--no-verbose"), ($debug ? "-d" : "--no-debug"), ($keep ? "-k" : "--no-keep"), "gen".$delta_wrapper->{type}, $delta->{wrapper}, $out); doit("mv", "-f", $out.".".$delta_wrapper->{type}, $tarball); } else { error "unknown wrapper file type: ". $delta_wrapper->{type}; } } } sub genmanifest { my $tarball=shift; my $manifest=shift; open(IN, "tar tf $tarball |") || die "tar tf: $!"; open(OUT, ">", $manifest) || die "$!"; while () { chomp; # ./ or / in the manifest just confuses tar s/^\.?\/+//; print OUT "$_\n" if length $_; } close IN; close OUT; } sub gendelta { my $tarball=shift; my $deltafile=shift; my %opts=@_; my $tempdir=tempdir(); my %delta; # Check to see if it's compressed, and get uncompressed tarball. my $compression=undef; if (is_gz($tarball)) { $compression='gz'; open(IN, "-|", "zcat", $tarball) || die "zcat: $!"; open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; print OUT $_ while ; close IN || die "zcat: $!"; close OUT || die "$tempdir/origtarball: $!"; } elsif (is_bz2($tarball)) { $compression='bz2'; open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!"; open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; print OUT $_ while ; close IN || die "bzcat: $!"; close OUT || die "$tempdir/origtarball: $!"; } elsif (is_xz($tarball)) { $compression='xz'; open(IN, "-|", "xzcat", $tarball) || die "xzcat: $!"; open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!"; print OUT $_ while ; close IN || die "xzcat: $!"; close OUT || die "$tempdir/origtarball: $!"; } close IN; # Generate a wrapper file to recreate the compressed file. if (defined $compression) { $delta{wrapper}="$tempdir/wrapper"; doit("pristine-$compression", ($verbose ? "-v" : "--no-verbose"), ($debug ? "-d" : "--no-debug"), ($keep ? "-k" : "--no-keep"), "gendelta", $tarball, $delta{wrapper}); $tarball="$tempdir/origtarball"; } $delta{manifest}="$tempdir/manifest"; genmanifest($tarball, $delta{manifest}); my $recreatetarball; if (! exists $opts{recreatetarball}) { my $sourcedir="$tempdir/tmp"; doit("mkdir", $sourcedir); doit($tar_program, "xf", File::Spec->rel2abs($tarball), "-C", $sourcedir); # if all files were in a subdir, use the subdir as the sourcedir my @out=grep { $_ ne "$sourcedir/.." && $_ ne "$sourcedir/." } (glob("$sourcedir/*"), glob("$sourcedir/.*")); if ($#out == 0 && -d $out[0]) { $sourcedir=$out[0]; } $recreatetarball=recreatetarball("$tempdir/manifest", $sourcedir, clobber_source => 1); } else { $recreatetarball=$opts{recreatetarball}; } $delta{delta}="$tempdir/delta"; my $ret=system("$xdelta_program delta -0 --pristine $recreatetarball $tarball $delta{delta}") >> 8; # xdelta exits 1 on success if there were differences if ($ret != 1 && $ret != 0) { error "xdelta failed with return code $ret"; } if (-s $delta{delta} >= -s $tarball) { print STDERR "error: excessively large binary delta for $tarball\n"; if (! defined $compression) { print STDERR "(Probably the tarball is compressed with an unsupported form of compression.)\n"; } else { print STDERR "(Please consider filing a bug report.)\n"; } exit 1; } Pristine::Tar::Delta::write(Tarball => $deltafile, { version => 2, type => 'tar', %delta, }); } sub vcstype { if (-e ".git" || (exists $ENV{GIT_DIR} && length $ENV{GIT_DIR})) { return 'git'; } else { error("cannot determine type of vcs used for the current directory"); } } sub export { my $upstream=shift; my $dest=tempdir(); my $id; my $vcs=vcstype(); if ($vcs eq "git") { if (defined $upstream && $upstream =~ /[A-Za-z0-9]{40}/) { $id=$upstream; } else { if (! defined $upstream) { $upstream='upstream'; } my @reflines=map { chomp; $_ } `git show-ref \Q$upstream\E`; if (! @reflines) { error "failed to find ref using: git show-ref $upstream"; } # if one line's ref matches exactly, use it foreach my $line (@reflines) { my ($b)=$line=~/^[A-Za-z0-9]+\s(.*)/; if ($b eq $upstream || $b eq "refs/heads/$upstream") { ($id)=$line=~/^([A-Za-z0-9]+)\s/; last; } } if (! defined $id) { if (@reflines == 1) { ($id)=$reflines[0]=~/^([A-Za-z0-9]+)\s/; } else { error "more than one ref matches \"$upstream\":\n". join("\n", @reflines); } } } # We have an id that is probably a commit. Let's get to the # id of the actual tree instead. This makes us more robust # against any later changes to the commit. my $treeid=`git rev-parse '$id^{tree}'`; chomp $treeid; $id = $treeid if length $treeid; doit("git archive --format=tar \Q$id\E | (cd '$dest' && tar x)"); } else { die "unsupported vcs $vcs"; } return ($dest, $id); } sub git_findbranch { # Looks for a branch with the given name. If a local branch exists, # returns it. Otherwise, looks for a remote branch, and if exactly # one exists, returns that. If there's no such branch at all, returns # undef. Finally, if there are multiple remote branches and no # local branch, fails with an error. my $branch=shift; my @reflines=split(/\n/, `git show-ref \Q$branch\E`); my @remotes=grep { ! m/ refs\/heads\/\Q$branch\E$/ } @reflines; if ($#reflines != $#remotes) { return $branch; } else { if (@reflines == 0) { return undef; } elsif (@remotes == 1) { my ($remote_branch)=$remotes[0]=~/^[A-Za-z0-9]+\s(.*)/; return $remote_branch; } else { error "There's no local $branch branch. Several remote $branch branches exist.\n". "Run \"git branch --track $branch \" to create a local $branch branch\n". join("\n", @remotes); } } } sub checkoutdelta { my $tarball=shift; my $branch="pristine-tar"; my $deltafile=basename($tarball).".delta"; my $idfile=basename($tarball).".id"; my ($delta, $id); my $vcs=vcstype(); if ($vcs eq "git") { my $b=git_findbranch($branch); if (! defined $b) { error "no $branch branch found, use \"pristine-tar commit\" first"; } elsif ($b eq $branch) { $branch="refs/heads/$branch"; } else { # use remote branch $branch=$b; } $delta=`git show $branch:\Q$deltafile\E`; if ($?) { error "git show $branch:$deltafile failed"; } if (! length $delta) { error "git show $branch:$deltafile returned no content"; } $id=`git show $branch:\Q$idfile\E`; if ($?) { error "git show $branch:$idfile failed"; } chomp $id; if (! length $id) { error "git show $branch:$idfile returned no id"; } } else { die "unsupported vcs $vcs"; } return ($delta, $id); } sub commitdelta { my $delta=shift; my $id=shift; my $tarball=shift; my $branch="pristine-tar"; my $deltafile=basename($tarball).".delta"; my $idfile=basename($tarball).".id"; my $commit_message=defined $message ? $message : "pristine-tar data for ".basename($tarball); my $vcs=vcstype(); if ($vcs eq "git") { my $tempdir=tempdir(); open(OUT, ">$tempdir/$deltafile") || die "$tempdir/$deltafile: $!"; print OUT $delta; close OUT; open(OUT, ">$tempdir/$idfile") || die "$tempdir/$idfile: $!"; print OUT "$id\n"; close OUT; # Commit the delta to a branch in git without affecting the # index, and without touching the working tree. Aka deep # git magick. $ENV{GIT_INDEX_FILE}="$tempdir/index"; if (! exists $ENV{GIT_DIR} || ! length $ENV{GIT_DIR}) { $ENV{GIT_DIR}=getcwd."/.git"; } else { $ENV{GIT_DIR}=abs_path($ENV{GIT_DIR}); } chdir($tempdir) || die "chdir: $!"; # If there's no local branch, branch from a remote branch # if one exists. If there's no remote branch either, the # code below will create the local branch. my $b=git_findbranch($branch); if (defined $b && $b ne $branch) { doit("git branch --track \Q$branch\E \Q$b\E"); } my $branch_exists=(system("git show-ref --quiet --verify refs/heads/$branch") == 0); if ($branch_exists) { doit("git ls-tree -r --full-name $branch | git update-index --index-info"); } doit("git", "update-index", "--add", $deltafile, $idfile); my $sha=`git write-tree`; if ($?) { error("git write-tree failed"); } $sha=~s/\n//sg; if (! length $sha) { error("git write-tree did not return a sha"); } my $pid = open(COMMIT, "|-"); if (! $pid) { # child my $commitopts=$branch_exists ? "-p $branch" : ""; my $commitsha=`git commit-tree $sha $commitopts`; if ($?) { error("git commit-tree failed"); } $commitsha=~s/\n//sg; if (! length $commitsha) { error("git commit-tree did not return a sha"); } doit("git", "update-ref", "refs/heads/$branch", $commitsha); exit 0; } else { # parent print COMMIT $commit_message."\n"; close COMMIT || error("git commit-tree failed"); } message("committed $deltafile to branch $branch"); } else { die "unsupported vcs $vcs"; } } sub commit { my $tarball=shift; my $upstream=shift; # optional if (! defined $tarball || @_) { usage(); } my $tempdir=tempdir(); my ($sourcedir, $id)=export($upstream); genmanifest($tarball, "$tempdir/manifest"); my $recreatetarball=recreatetarball("$tempdir/manifest", $sourcedir, clobber_source => 1, create_missing => 1); my $pid = open(GENDELTA, "-|"); if (! $pid) { # child gendelta($tarball, "-", recreatetarball => $recreatetarball); exit 0; } local $/=undef; my $delta=; close GENDELTA || error "failed to generate delta"; commitdelta($delta, $id, $tarball); } sub checkout { my $tarball=shift; my ($delta, $id)=checkoutdelta($tarball); my ($sourcedir, undef)=export($id); my $pid = open(GENTAR, "|-"); if (! $pid) { # child $tarball=abs_path($tarball); chdir($sourcedir) || die "chdir $sourcedir: $!"; gentar("-", $tarball, clobber_source => 1, create_missing => 1); exit 0; } print GENTAR $delta; close GENTAR || error "failed to generate tarball"; message("successfully generated $tarball"); } sub list { my $branch="pristine-tar"; my $vcs=vcstype(); if ($vcs eq "git") { my $b=git_findbranch($branch); if (defined $b) { open (LIST, "git ls-tree $b --name-only |"); while () { chomp; next unless s/\.delta$//; print $_."\n"; } } else { die "unsupported vcs $vcs"; } } }