1 # Copyright 2013 The Swarming Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 that
3 # can be found in the LICENSE file.
5 """Utilities to work with importable python zip packages."""
9 import cStringIO as StringIO
21 # Glob patterns for files to exclude from a package by default.
23 # Ignore hidden files (including .svn and .git).
26 # Ignore precompiled python files since they depend on python version and we
27 # don't want zip package to be version-depended.
33 # Temporary files extracted by extract_resource. Removed in atexit hook.
35 _extracted_files_lock = threading.Lock()
38 class ZipPackageError(RuntimeError):
39 """Failed to create a zip package."""
42 class ZipPackage(object):
43 """A set of files that can be zipped to file on disk or into memory buffer.
46 package = ZipPackage(root)
47 package.add_file('some_file.py', '__main__.py')
48 package.add_directory('some_directory')
49 package.add_buffer('generated.py', 'any string here')
51 buf = package.zip_into_buffer()
52 package.zip_into_file('my_zip.zip')
55 _FileRef = collections.namedtuple('_FileRef', ['abs_path'])
56 _BufferRef = collections.namedtuple('_BufferRef', ['buffer'])
58 def __init__(self, root):
59 """Initializes new empty ZipPackage.
61 All files added to the package should live under the |root|. It will also
62 be used when calculating relative paths of files in the package.
64 |root| must be an absolute path.
66 assert os.path.isabs(root), root
67 self.root = root.rstrip(os.sep) + os.sep
72 """Files added to the package as a list of relative paths in zip."""
73 return self._items.keys()
75 def add_file(self, absolute_path, archive_path=None):
76 """Adds a single file to the package.
78 |archive_path| is a relative path in archive for this file, by default it's
79 equal to |absolute_path| taken relative to |root|. In that case
80 |absolute_path| must be in a |root| subtree.
82 If |archive_path| is given, |absolute_path| can point to any file.
84 assert os.path.isabs(absolute_path), absolute_path
85 absolute_path = os.path.normpath(absolute_path)
86 # If |archive_path| is not given, ensure that |absolute_path| is under root.
87 if not archive_path and not absolute_path.startswith(self.root):
88 raise ZipPackageError(
89 'Path %s is not inside root %s' % (absolute_path, self.root))
90 if not os.path.exists(absolute_path):
91 raise ZipPackageError('No such file: %s' % absolute_path)
92 if not os.path.isfile(absolute_path):
93 raise ZipPackageError('Object %s is not a regular file' % absolute_path)
94 archive_path = archive_path or absolute_path[len(self.root):]
95 self._add_entry(archive_path, ZipPackage._FileRef(absolute_path))
97 def add_python_file(self, absolute_path, archive_path=None):
98 """Adds a single python file to the package.
100 Recognizes *.pyc files and adds corresponding *.py file instead.
102 base, ext = os.path.splitext(absolute_path)
103 if ext in ('.pyc', '.pyo'):
104 absolute_path = base + '.py'
106 raise ZipPackageError('Not a python file: %s' % absolute_path)
107 self.add_file(absolute_path, archive_path)
109 def add_directory(self, absolute_path, archive_path=None,
110 exclude=EXCLUDE_LIST):
111 """Recursively adds all files from given directory to the package.
113 |archive_path| is a relative path in archive for this directory, by default
114 it's equal to |absolute_path| taken relative to |root|. In that case
115 |absolute_path| must be in |root| subtree.
117 If |archive_path| is given, |absolute_path| can point to any directory.
119 |exclude| defines a list of regular expressions for file names to exclude
122 Only non-empty directories will be actually added to the package.
124 assert os.path.isabs(absolute_path), absolute_path
125 absolute_path = os.path.normpath(absolute_path).rstrip(os.sep) + os.sep
126 # If |archive_path| is not given, ensure that |path| is under root.
127 if not archive_path and not absolute_path.startswith(self.root):
128 raise ZipPackageError(
129 'Path %s is not inside root %s' % (absolute_path, self.root))
130 if not os.path.exists(absolute_path):
131 raise ZipPackageError('No such directory: %s' % absolute_path)
132 if not os.path.isdir(absolute_path):
133 raise ZipPackageError('Object %s is not a directory' % absolute_path)
135 # Precompile regular expressions.
136 exclude_regexps = [re.compile(r) for r in exclude]
137 # Returns True if |name| should be excluded from the package.
138 should_exclude = lambda name: any(r.match(name) for r in exclude_regexps)
140 archive_path = archive_path or absolute_path[len(self.root):]
141 for cur_dir, dirs, files in os.walk(absolute_path):
142 # Add all non-excluded files.
144 if not should_exclude(name):
145 absolute = os.path.join(cur_dir, name)
146 relative = absolute[len(absolute_path):]
147 assert absolute.startswith(absolute_path)
148 self.add_file(absolute, os.path.join(archive_path, relative))
149 # Remove excluded directories from enumeration.
150 for name in [d for d in dirs if should_exclude(d)]:
153 def add_buffer(self, archive_path, buf):
154 """Adds a contents of the given string |buf| to the package as a file.
156 |archive_path| is a path in archive for this file.
158 # Only 'str' is allowed here, no 'unicode'
159 assert isinstance(buf, str)
160 self._add_entry(archive_path, ZipPackage._BufferRef(buf))
162 def zip_into_buffer(self, compress=True):
163 """Zips added files into in-memory zip file and returns it as str."""
164 stream = StringIO.StringIO()
166 self._zip_into_stream(stream, compress)
167 return stream.getvalue()
171 def zip_into_file(self, path, compress=True):
172 """Zips added files into a file on disk."""
173 with open(path, 'wb') as stream:
174 self._zip_into_stream(stream, compress)
176 def _add_entry(self, archive_path, ref):
177 """Adds new zip package entry."""
178 # Always use forward slashes in zip.
179 archive_path = archive_path.replace(os.sep, '/')
180 # Ensure there are no suspicious components in the path.
181 assert not any(p in ('', '.', '..') for p in archive_path.split('/'))
182 # Ensure there's no file overwrites.
183 if archive_path in self._items:
184 raise ZipPackageError('Duplicated entry: %s' % archive_path)
185 self._items[archive_path] = ref
187 def _zip_into_stream(self, stream, compress):
188 """Zips files added so far into some output stream.
190 Some measures are taken to guarantee that final zip depends only on the
191 content of added files:
192 * File modification time is not stored.
193 * Entries are sorted by file name in archive.
195 compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
196 zip_file = zipfile.ZipFile(stream, 'w', compression)
198 for archive_path in sorted(self._items):
199 ref = self._items[archive_path]
200 info = zipfile.ZipInfo(filename=archive_path)
201 info.compress_type = compression
202 info.create_system = 3
203 if isinstance(ref, ZipPackage._FileRef):
204 info.external_attr = (os.stat(ref.abs_path)[0] & 0xFFFF) << 16L
205 with open(ref.abs_path, 'rb') as f:
207 elif isinstance(ref, ZipPackage._BufferRef):
210 assert False, 'Unexpected type %s' % ref
211 zip_file.writestr(info, buf)
216 def get_module_zip_archive(module):
217 """Given a module, returns path to a zip package that contains it or None."""
218 loader = pkgutil.get_loader(module)
219 if not isinstance(loader, zipimport.zipimporter):
221 # 'archive' property is documented only for python 2.7, but it appears to be
222 # there at least since python 2.5.2.
223 return loader.archive
226 def is_zipped_module(module):
227 """True if given module was loaded from a zip package."""
228 return bool(get_module_zip_archive(module))
231 def get_main_script_path():
232 """If running from zip returns path to a zip file, else path to __main__.
234 Basically returns path to a file passed to python for execution
235 as in 'python <main_script>' considering a case of executable zip package.
237 Returns path relative to a current directory of when process was started.
239 # If running from interactive console __file__ is not defined.
240 main = sys.modules['__main__']
241 return get_module_zip_archive(main) or getattr(main, '__file__', None)
244 def extract_resource(package, resource, temp_dir=None):
245 """Returns real file system path to a |resource| file from a |package|.
247 If it's inside a zip package, will extract it first into temp file created
248 with tempfile.mkstemp. Such file is readable and writable only by the creating
251 |package| is a python module object that represents a package.
252 |resource| should be a relative filename, using '/'' as the path separator.
254 Raises ValueError if no such resource.
256 # For regular non-zip packages just construct an absolute path.
257 if not is_zipped_module(package):
258 # Package's __file__ attribute is always an absolute path.
259 path = os.path.join(os.path.dirname(package.__file__),
260 resource.replace('/', os.sep))
261 if not os.path.exists(path):
262 raise ValueError('No such resource in %s: %s' % (package, resource))
265 # For zipped packages extract the resource into a temp file.
266 data = pkgutil.get_data(package.__name__, resource)
268 raise ValueError('No such resource in zipped %s: %s' % (package, resource))
269 fd, path = tempfile.mkstemp(
270 suffix='-' + os.path.basename(resource), prefix='.zip_pkg-', dir=temp_dir)
271 path = os.path.abspath(path)
272 with os.fdopen(fd, 'w') as stream:
275 # Register it for removal when process dies.
276 with _extracted_files_lock:
277 _extracted_files.append(path)
278 # First extracted file -> register atexit hook that cleans them all.
279 if len(_extracted_files) == 1:
280 atexit.register(cleanup_extracted_resources)
285 def cleanup_extracted_resources():
286 """Removes all temporary files created by extract_resource.
288 Executed as atexit hook.
290 with _extracted_files_lock:
291 while _extracted_files:
293 os.remove(_extracted_files.pop())
298 def generate_version():
299 """Generates the sha-1 based on the content of this zip.
301 It is hashing the content of the zip, not the compressed bits. The compression
302 has other side effects that kicks in, like zlib's library version, compression
303 level, order in which the files were specified, etc.
305 assert is_zipped_module(sys.modules['__main__'])
306 result = hashlib.sha1()
307 # TODO(maruel): This function still has to be compatible with python 2.6. Use
308 # a with statement once every bots are upgraded to 2.7.
309 z = zipfile.ZipFile(get_main_script_path(), 'r')
310 for item in sorted(z.namelist()):
313 result.update('\x00')
314 result.update(f.read())
315 result.update('\x00')
318 return result.hexdigest()