2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
17 """A Cygwin aware version compress/extract object.
19 This module supports creating and unpacking a tarfile on all platforms. For
20 Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For
21 Win32 it will detect Cygwin style symlinks as it archives and convert them to
24 For Win32, it is unfortunate that os.stat does not return a FileID in the ino
25 field which would allow us to correctly determine which files are hardlinks, so
26 instead we assume that any files in the archive that are an exact match are
27 hardlinks to the same data.
29 We know they are not Symlinks because we are using Cygwin style symlinks only,
30 which appear to Win32 a normal file.
32 All paths stored and retrieved from a TAR file are expected to be POSIX style,
33 Win32 style paths will be rejected.
36 All paths represent by the tarfile and all API functions are POSIX style paths
37 except for CygTar.Add which assumes a Native path.
41 def ToNativePath(native_path):
42 """Convert to a posix style path if this is win32."""
43 if sys.platform == 'win32':
44 return native_path.replace('/', '\\')
48 def IsCygwinSymlink(symtext):
49 """Return true if the provided text looks like a Cygwin symlink."""
50 return symtext[:12] == '!<symlink>\xff\xfe'
53 def SymDatToPath(symtext):
54 """Convert a Cygwin style symlink data to a relative path."""
55 return ''.join([ch for ch in symtext[12:] if ch != '\x00'])
58 def PathToSymDat(filepath):
59 """Convert a filepath to cygwin style symlink data."""
60 symtag = '!<symlink>\xff\xfe'
61 unipath = ''.join([ch + '\x00' for ch in filepath])
63 return symtag + unipath + strterm
66 def CreateCygwinSymlink(filepath, target):
67 """Create a Cygwin 1.7 style link
69 Generates a Cygwin style symlink by creating a SYSTEM tagged
70 file with the !<link> marker followed by a unicode path.
72 # If we failed to create a symlink, then just copy it. We wrap this in a
73 # retry for Windows which often has stale file lock issues.
74 for cnt in range(1,4):
76 lnk = open(filepath, 'wb')
77 lnk.write(PathToSymDat(target))
80 except EnvironmentError:
81 print 'Try %d: Failed open %s -> %s\n' % (cnt, filepath, target)
83 # Verify the file was created
84 if not os.path.isfile(filepath):
85 print 'Try %d: Failed create %s -> %s\n' % (cnt, filepath, target)
89 # Now set the system attribute bit so that Cygwin knows it's a link.
90 for cnt in range(1,4):
92 return subprocess.call(['cmd', '/C', 'C:\\Windows\\System32\\attrib.exe',
93 '+S', ToNativePath(filepath)])
94 except EnvironmentError:
95 print 'Try %d: Failed attrib %s -> %s\n' % (cnt, filepath, target)
100 def CreateWin32Hardlink(filepath, targpath, try_mklink):
101 """Create a hardlink on Win32 if possible
103 Uses mklink to create a hardlink if possible. On failure, it will
104 assume mklink is unavailible and copy the file instead, returning False
105 to indicate future calls should not attempt to use mklink."""
107 # Assume an error, if subprocess succeeds, then it should return 0
110 dst_src = ToNativePath(filepath) + ' ' + ToNativePath(targpath)
112 err = subprocess.call(['cmd', '/C', 'mklink /H ' + dst_src],
113 stdout = open(os.devnull, 'wb'))
114 except EnvironmentError:
117 # If we failed to create a hardlink, then just copy it. We wrap this in a
118 # retry for Windows which often has stale file lock issues.
119 if err or not os.path.isfile(filepath):
120 for cnt in range(1,4):
122 shutil.copyfile(targpath, filepath)
124 except EnvironmentError:
125 print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath)
130 def ComputeFileHash(filepath):
131 """Generate a sha1 hash for the file at the given path."""
132 sha1 = hashlib.sha1()
133 with open(filepath, 'rb') as fp:
134 sha1.update(fp.read())
135 return sha1.hexdigest()
138 def ReadableSizeOf(num):
139 """Convert to a human readable number."""
141 return '[%5dB]' % num
142 for x in ['B','K','M','G','T']:
144 return '[%5.1f%s]' % (num, x)
146 return '[%dT]' % int(num)
149 class CygTar(object):
150 """ CygTar is an object which represents a Win32 and Cygwin aware tarball."""
151 def __init__(self, filename, mode='r', verbose=False):
153 self.file_hashes = {}
154 # Set errorlevel=1 so that fatal errors actually raise!
156 self.read_file = open(filename, 'rb')
157 self.read_filesize = os.path.getsize(filename)
158 self.tar = tarfile.open(mode=mode, fileobj=self.read_file, errorlevel=1)
160 self.read_file = None
161 self.read_filesize = 0
162 self.tar = tarfile.open(filename, mode=mode, errorlevel=1)
163 self.verbose = verbose
165 def __DumpInfo(self, tarinfo):
166 """Prints information on a single object in the tarball."""
171 lnk = '-> ' + tarinfo.linkname
174 lnk = '-> ' + tarinfo.linkname
179 reable_size = ReadableSizeOf(tarinfo.size)
180 print '%s %s : %s %s' % (reable_size, typeinfo, tarinfo.name, lnk)
183 def __AddFile(self, tarinfo, fileobj=None):
184 """Add a file to the archive."""
186 self.__DumpInfo(tarinfo)
187 self.tar.addfile(tarinfo, fileobj)
189 def __AddLink(self, tarinfo, linktype, linkpath):
190 """Add a Win32 symlink or hardlink to the archive."""
191 tarinfo.linkname = linkpath
192 tarinfo.type = linktype
194 self.__AddFile(tarinfo)
196 def Add(self, filepath, prefix=None):
197 """Add path filepath to the archive which may be Native style.
199 Add files individually recursing on directories. For POSIX we use
200 tarfile.addfile directly on symlinks and hardlinks. For files, we
201 must check if they are duplicates which we convert to hardlinks
202 or Cygwin style symlinks which we convert form a file to a symlink
203 in the tarfile. All other files are added as a standard file.
206 # At this point tarinfo.name will contain a POSIX style path regardless
207 # of the original filepath.
208 tarinfo = self.tar.gettarinfo(filepath)
210 tarinfo.name = posixpath.join(prefix, tarinfo.name)
212 if sys.platform == 'win32':
213 # On win32 os.stat() always claims that files are world writable
214 # which means that unless we remove this bit here we end up with
215 # world writables files in the archive, which is almost certainly
217 tarinfo.mode &= ~stat.S_IWOTH
218 tarinfo.mode &= ~stat.S_IWGRP
220 # If we want cygwin to be able to extract this archive and use
221 # executables and dll files we need to mark all the archive members as
222 # executable. This is essentially what happens anyway when the
223 # archive is extracted on win32.
224 tarinfo.mode |= stat.S_IXUSR | stat.S_IXOTH | stat.S_IXGRP
226 # If this a symlink or hardlink, add it
227 if tarinfo.issym() or tarinfo.islnk():
229 self.__AddFile(tarinfo)
232 # If it's a directory, then you want to recurse into it
234 self.__AddFile(tarinfo)
235 native_files = glob.glob(os.path.join(filepath, '*'))
236 for native_file in native_files:
237 if not self.Add(native_file, prefix): return False
240 # At this point we only allow addition of "FILES"
241 if not tarinfo.isfile():
242 print 'Failed to add non real file: %s' % filepath
245 # Now check if it is a Cygwin style link disguised as a file.
246 # We go ahead and check on all platforms just in case we are tar'ing a
247 # mount shared with windows.
248 if tarinfo.size <= 524:
249 with open(filepath) as fp:
251 if IsCygwinSymlink(symtext):
252 self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext))
255 # Otherwise, check if its a hardlink by seeing if it matches any unique
256 # hash within the list of hashed files for that file size.
257 nodelist = self.size_map.get(tarinfo.size, [])
259 # If that size bucket is empty, add this file, no need to get the hash until
260 # we get a bucket collision for the first time..
262 self.size_map[tarinfo.size] = [filepath]
263 with open(filepath, 'rb') as fp:
264 self.__AddFile(tarinfo, fp)
267 # If the size collides with anything, we'll need to check hashes. We assume
268 # no hash collisions for SHA1 on a given bucket, since the number of files
269 # in a bucket over possible SHA1 values is near zero.
270 newhash = ComputeFileHash(filepath)
271 self.file_hashes[filepath] = newhash
273 for oldname in nodelist:
274 oldhash = self.file_hashes.get(oldname, None)
276 oldhash = ComputeFileHash(oldname)
277 self.file_hashes[oldname] = oldhash
279 if oldhash == newhash:
280 self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname)
283 # Otherwise, we missed, so add it to the bucket for this size
284 self.size_map[tarinfo.size].append(filepath)
285 with open(filepath, 'rb') as fp:
286 self.__AddFile(tarinfo, fp)
290 """Extract the tarfile to the current directory."""
294 sys.stdout.write('|' + ('-' * 48) + '|\n')
300 cnt = self.read_file.tell()
301 curdots = cnt * 50 / self.read_filesize
302 if dots_outputted < curdots:
303 for dot in xrange(dots_outputted, curdots):
304 sys.stdout.write('.')
306 dots_outputted = curdots
308 # For symlinks in Windows we create Cygwin 1.7 style symlinks since the
309 # toolchain is Cygwin based. For hardlinks on Windows, we use mklink if
310 # possible to create a hardlink. For all other tar items, or platforms we
311 # go ahead and extract it normally.
312 if m.issym() and sys.platform == 'win32':
313 CreateCygwinSymlink(m.name, m.linkname)
314 # For hardlinks in Windows, we try to use mklink, and instead copy on
316 elif m.islnk() and sys.platform == 'win32':
317 try_mklink = CreateWin32Hardlink(m.name, m.linkname, try_mklink)
318 # Otherwise, extract normally.
322 sys.stdout.write('\n')
326 """List the set of objects in the tarball."""
327 for tarinfo in self.tar:
328 self.__DumpInfo(tarinfo)
332 if self.read_file is not None:
333 self.read_file.close()
334 self.read_file = None
335 self.read_filesize = 0
339 parser = optparse.OptionParser()
341 parser.add_option('-c', '--create', help='Create a tarball.',
342 action='store_const', const='c', dest='action', default='')
343 parser.add_option('-x', '--extract', help='Extract a tarball.',
344 action='store_const', const='x', dest='action')
345 parser.add_option('-t', '--list', help='List sources in tarball.',
346 action='store_const', const='t', dest='action')
348 # Compression formats
349 parser.add_option('-j', '--bzip2', help='Create a bz2 tarball.',
350 action='store_const', const=':bz2', dest='format', default='')
351 parser.add_option('-z', '--gzip', help='Create a gzip tarball.',
352 action='store_const', const=':gz', dest='format', )
354 parser.add_option('-v', '--verbose', help='Use verbose output.',
355 action='store_true', dest='verbose', default=False)
356 parser.add_option('-f', '--file', help='Name of tarball.',
357 dest='filename', default='')
358 parser.add_option('-C', '--directory', help='Change directory.',
359 dest='cd', default='')
360 parser.add_option('--prefix', help='Subdirectory prefix for all paths')
362 options, args = parser.parse_args(args[1:])
363 if not options.action:
364 parser.error('Expecting compress or extract')
365 if not options.filename:
366 parser.error('Expecting a filename')
368 if options.action in ['c'] and not args:
369 parser.error('Expecting list of sources to add')
370 if options.action in ['x', 't'] and args:
371 parser.error('Unexpected source list on extract')
373 if options.action == 'c':
374 mode = 'w' + options.format
376 mode = 'r'+ options.format
378 tar = CygTar(options.filename, mode, verbose=options.verbose)
382 if options.action == 't':
386 if options.action == 'x':
390 if options.action == 'c':
391 for filepath in args:
392 if not tar.Add(filepath, options.prefix):
397 parser.error('Missing action c, t, or x.')
401 if __name__ == '__main__':
402 sys.exit(Main(sys.argv))