1 # Copyright 2014 The Swarming Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 that
3 # can be found in the LICENSE file.
5 """Understands .isolated files and can do local operations on them."""
15 from utils import file_path
16 from utils import tools
19 # Version stored and expected in .isolated files.
20 ISOLATED_FILE_VERSION = '1.4'
23 # Chunk size to use when doing disk I/O.
24 DISK_FILE_CHUNK = 1024 * 1024
27 # Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
28 # specify the names here.
31 'sha-1': hashlib.sha1,
32 'sha-512': hashlib.sha512,
36 # Used for serialization.
37 SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
40 class IsolatedError(ValueError):
41 """Generic failure to load a .isolated file."""
45 class MappingError(OSError):
46 """Failed to recreate the tree."""
50 def is_valid_hash(value, algo):
51 """Returns if the value is a valid hash for the corresponding algorithm."""
52 size = 2 * algo().digest_size
53 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
56 def get_hash_algo(_namespace):
57 """Return hash algorithm class to use when uploading to given |namespace|."""
58 # TODO(vadimsh): Implement this at some point.
62 def is_namespace_with_compression(namespace):
63 """Returns True if given |namespace| stores compressed objects."""
64 return namespace.endswith(('-gzip', '-deflate'))
67 def hash_file(filepath, algo):
68 """Calculates the hash of a file without reading it all in memory at once.
70 |algo| should be one of hashlib hashing algorithm.
73 with open(filepath, 'rb') as f:
75 chunk = f.read(DISK_FILE_CHUNK)
79 return digest.hexdigest()
82 class IsolatedFile(object):
83 """Represents a single parsed .isolated file."""
85 def __init__(self, obj_hash, algo):
86 """|obj_hash| is really the sha-1 of the file."""
87 self.obj_hash = obj_hash
92 # A IsolatedFile instance, one per object in self.includes.
95 # Set once the .isolated file is loaded.
96 self._is_loaded = False
99 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
101 def load(self, content):
102 """Verifies the .isolated file is valid and loads this object with the json
105 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
106 assert not self._is_loaded
107 self.data = load_isolated(content, self.algo)
109 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
111 self._is_loaded = True
115 """Returns True if 'load' was already called."""
116 return self._is_loaded
119 def walk_includes(isolated):
120 """Walks IsolatedFile include graph and yields IsolatedFile objects.
122 Visits root node first, then recursively all children, left to right.
123 Not yet loaded nodes are considered childless.
126 for child in isolated.children:
127 for x in walk_includes(child):
131 def expand_symlinks(indir, relfile):
132 """Follows symlinks in |relfile|, but treating symlinks that point outside the
133 build tree as if they were ordinary directories/files. Returns the final
134 symlink-free target and a list of paths to symlinks encountered in the
137 The rule about symlinks outside the build tree is for the benefit of the
138 Chromium OS ebuild, which symlinks the output directory to an unrelated path
141 Fails when a directory loop is detected, although in theory we could support
144 is_directory = relfile.endswith(os.path.sep)
146 todo = relfile.strip(os.path.sep)
150 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
153 todo = file_path.fix_native_path_case(done, todo)
154 done = os.path.join(done, todo)
156 symlink_path = os.path.join(done, pre_symlink, symlink)
157 post_symlink = post_symlink.lstrip(os.path.sep)
158 # readlink doesn't exist on Windows.
159 # pylint: disable=E1101
160 target = os.path.normpath(os.path.join(done, pre_symlink))
161 symlink_target = os.readlink(symlink_path)
162 if os.path.isabs(symlink_target):
163 # Absolute path are considered a normal directories. The use case is
164 # generally someone who puts the output directory on a separate drive.
165 target = symlink_target
167 # The symlink itself could be using the wrong path case.
168 target = file_path.fix_native_path_case(target, symlink_target)
170 if not os.path.exists(target):
172 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
173 target = file_path.get_native_path_case(target)
174 if not file_path.path_starts_with(indir, target):
178 if file_path.path_starts_with(target, symlink_path):
180 'Can\'t map recursive symlink reference %s -> %s' %
181 (symlink_path, target))
182 logging.info('Found symlink: %s -> %s', symlink_path, target)
183 symlinks.append(os.path.relpath(symlink_path, indir))
184 # Treat the common prefix of the old and new paths as done, and start
186 target = target.split(os.path.sep)
187 symlink_path = symlink_path.split(os.path.sep)
189 for target_piece, symlink_path_piece in zip(target, symlink_path):
190 if target_piece == symlink_path_piece:
194 done = os.path.sep.join(target[:prefix_length])
196 os.path.sep.join(target[prefix_length:]), post_symlink)
198 relfile = os.path.relpath(done, indir)
199 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
200 return relfile, symlinks
203 def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
204 """Expands a single input. It can result in multiple outputs.
206 This function is recursive when relfile is a directory.
208 Note: this code doesn't properly handle recursive symlink like one created
212 if os.path.isabs(relfile):
213 raise MappingError('Can\'t map absolute path %s' % relfile)
215 infile = file_path.normpath(os.path.join(indir, relfile))
216 if not infile.startswith(indir):
217 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
219 filepath = os.path.join(indir, relfile)
220 native_filepath = file_path.get_native_path_case(filepath)
221 if filepath != native_filepath:
223 if filepath != native_filepath + '.' + os.path.sep:
224 # While it'd be nice to enforce path casing on Windows, it's impractical.
225 # Also give up enforcing strict path case on OSX. Really, it's that sad.
226 # The case where it happens is very specific and hard to reproduce:
227 # get_native_path_case(
228 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
229 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
231 # Note that this is really something deep in OSX because running
232 # ls Foo.framework/Versions/A
233 # will print out 'Resources', while file_path.get_native_path_case()
234 # returns a lower case 'r'.
236 # So *something* is happening under the hood resulting in the command 'ls'
237 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
239 if sys.platform not in ('darwin', 'win32'):
241 'File path doesn\'t equal native file path\n%s != %s' %
242 (filepath, native_filepath))
246 relfile, symlinks = expand_symlinks(indir, relfile)
248 if relfile.endswith(os.path.sep):
249 if not os.path.isdir(infile):
251 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
254 if relfile.startswith('.' + os.path.sep):
255 relfile = relfile[2:]
258 for filename in os.listdir(infile):
259 inner_relfile = os.path.join(relfile, filename)
260 if blacklist and blacklist(inner_relfile):
262 if os.path.isdir(os.path.join(indir, inner_relfile)):
263 inner_relfile += os.path.sep
265 expand_directory_and_symlink(indir, inner_relfile, blacklist,
270 'Unable to iterate over directory %s.\n%s' % (infile, e))
272 # Always add individual files even if they were blacklisted.
273 if os.path.isdir(infile):
275 'Input directory %s must have a trailing slash' % infile)
277 if not os.path.isfile(infile):
278 raise MappingError('Input file %s doesn\'t exist' % infile)
280 return symlinks + [relfile]
283 def expand_directories_and_symlinks(
284 indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
285 """Expands the directories and the symlinks, applies the blacklist and
286 verifies files exist.
288 Files are specified in os native path separator.
291 for relfile in infiles:
294 expand_directory_and_symlink(
295 indir, relfile, blacklist, follow_symlinks))
296 except MappingError as e:
297 if not ignore_broken_items:
299 logging.info('warning: %s', e)
303 def file_to_metadata(filepath, prevdict, read_only, algo):
304 """Processes an input file, a dependency, and return meta data about it.
307 - Retrieves the file mode, file size, file timestamp, file link
308 destination if it is a file link and calcultate the SHA-1 of the file's
309 content if the path points to a file and not a symlink.
312 filepath: File to act on.
313 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
314 to skip recalculating the hash. Optional.
315 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
316 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
317 windows, mode is not set since all files are 'executable' by
319 algo: Hashing algorithm used.
322 The necessary dict to create a entry in the 'files' section of an .isolated
326 # Always check the file stat and check if it is a link. The timestamp is used
327 # to know if the file's content/symlink destination should be looked into.
328 # E.g. only reuse from prevdict if the timestamp hasn't changed.
329 # There is the risk of the file's timestamp being reset to its last value
330 # manually while its content changed. We don't protect against that use case.
332 filestats = os.lstat(filepath)
334 # The file is not present.
335 raise MappingError('%s is missing' % filepath)
336 is_link = stat.S_ISLNK(filestats.st_mode)
338 if sys.platform != 'win32':
339 # Ignore file mode on Windows since it's not really useful there.
340 filemode = stat.S_IMODE(filestats.st_mode)
341 # Remove write access for group and all access to 'others'.
342 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
344 filemode &= ~stat.S_IWUSR
345 if filemode & stat.S_IXUSR:
346 filemode |= stat.S_IXGRP
348 filemode &= ~stat.S_IXGRP
352 # Used to skip recalculating the hash or link destination. Use the most recent
354 out['t'] = int(round(filestats.st_mtime))
357 out['s'] = filestats.st_size
358 # If the timestamp wasn't updated and the file size is still the same, carry
360 if (prevdict.get('t') == out['t'] and
361 prevdict.get('s') == out['s']):
362 # Reuse the previous hash if available.
363 out['h'] = prevdict.get('h')
365 out['h'] = hash_file(filepath, algo)
367 # If the timestamp wasn't updated, carry on the link destination.
368 if prevdict.get('t') == out['t']:
369 # Reuse the previous link destination if available.
370 out['l'] = prevdict.get('l')
371 if out.get('l') is None:
372 # The link could be in an incorrect path case. In practice, this only
373 # happen on OSX on case insensitive HFS.
374 # TODO(maruel): It'd be better if it was only done once, in
375 # expand_directory_and_symlink(), so it would not be necessary to do again
377 symlink_value = os.readlink(filepath) # pylint: disable=E1101
378 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
379 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
380 out['l'] = os.path.relpath(native_dest, filedir)
384 def save_isolated(isolated, data):
385 """Writes one or multiple .isolated files.
387 Note: this reference implementation does not create child .isolated file so it
388 always returns an empty list.
390 Returns the list of child isolated files that are included by |isolated|.
392 # Make sure the data is valid .isolated data by 'reloading' it.
393 algo = SUPPORTED_ALGOS[data['algo']]
394 load_isolated(json.dumps(data), algo)
395 tools.write_json(isolated, data, True)
399 def load_isolated(content, algo):
400 """Verifies the .isolated file is valid and loads this object with the json
404 - content: raw serialized content to load.
405 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
406 algorithm used on the Isolate Server.
409 data = json.loads(content)
411 raise IsolatedError('Failed to parse: %s...' % content[:100])
413 if not isinstance(data, dict):
414 raise IsolatedError('Expected dict, got %r' % data)
416 # Check 'version' first, since it could modify the parsing after.
417 value = data.get('version', '1.0')
418 if not isinstance(value, basestring):
419 raise IsolatedError('Expected string, got %r' % value)
421 version = tuple(map(int, value.split('.')))
423 raise IsolatedError('Expected valid version, got %r' % value)
425 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
426 # Major version must match.
427 if version[0] != expected_version[0]:
429 'Expected compatible \'%s\' version, got %r' %
430 (ISOLATED_FILE_VERSION, value))
433 # TODO(maruel): Remove the default around Jan 2014.
434 # Default the algorithm used in the .isolated file itself, falls back to
435 # 'sha-1' if unspecified.
436 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
438 for key, value in data.iteritems():
440 if not isinstance(value, basestring):
441 raise IsolatedError('Expected string, got %r' % value)
442 if value not in SUPPORTED_ALGOS:
444 'Expected one of \'%s\', got %r' %
445 (', '.join(sorted(SUPPORTED_ALGOS)), value))
446 if value != SUPPORTED_ALGOS_REVERSE[algo]:
448 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
450 elif key == 'command':
451 if not isinstance(value, list):
452 raise IsolatedError('Expected list, got %r' % value)
454 raise IsolatedError('Expected non-empty command')
455 for subvalue in value:
456 if not isinstance(subvalue, basestring):
457 raise IsolatedError('Expected string, got %r' % subvalue)
460 if not isinstance(value, dict):
461 raise IsolatedError('Expected dict, got %r' % value)
462 for subkey, subvalue in value.iteritems():
463 if not isinstance(subkey, basestring):
464 raise IsolatedError('Expected string, got %r' % subkey)
465 if not isinstance(subvalue, dict):
466 raise IsolatedError('Expected dict, got %r' % subvalue)
467 for subsubkey, subsubvalue in subvalue.iteritems():
469 if not isinstance(subsubvalue, basestring):
470 raise IsolatedError('Expected string, got %r' % subsubvalue)
471 elif subsubkey == 'm':
472 if not isinstance(subsubvalue, int):
473 raise IsolatedError('Expected int, got %r' % subsubvalue)
474 elif subsubkey == 'h':
475 if not is_valid_hash(subsubvalue, algo):
476 raise IsolatedError('Expected sha-1, got %r' % subsubvalue)
477 elif subsubkey == 's':
478 if not isinstance(subsubvalue, (int, long)):
479 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
481 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
482 if bool('h' in subvalue) == bool('l' in subvalue):
484 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
486 if bool('h' in subvalue) != bool('s' in subvalue):
488 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
490 if bool('s' in subvalue) == bool('l' in subvalue):
492 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
494 if bool('l' in subvalue) and bool('m' in subvalue):
496 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
499 elif key == 'includes':
500 if not isinstance(value, list):
501 raise IsolatedError('Expected list, got %r' % value)
503 raise IsolatedError('Expected non-empty includes list')
504 for subvalue in value:
505 if not is_valid_hash(subvalue, algo):
506 raise IsolatedError('Expected sha-1, got %r' % subvalue)
509 if version >= (1, 4):
510 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
512 elif key == 'read_only':
513 if not value in (0, 1, 2):
514 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
516 elif key == 'relative_cwd':
517 if not isinstance(value, basestring):
518 raise IsolatedError('Expected string, got %r' % value)
520 elif key == 'version':
521 # Already checked above.
525 raise IsolatedError('Unknown key %r' % key)
527 # Automatically fix os.path.sep if necessary. While .isolated files are always
528 # in the the native path format, someone could want to download an .isolated
529 # tree from another OS.
530 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
532 data['files'] = dict(
533 (k.replace(wrong_path_sep, os.path.sep), v)
534 for k, v in data['files'].iteritems())
535 for v in data['files'].itervalues():
537 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
538 if 'relative_cwd' in data:
539 data['relative_cwd'] = data['relative_cwd'].replace(
540 wrong_path_sep, os.path.sep)