src/tools/swarming_client/isolate.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Front end tool to operate on .isolate files.
   7
   8 This includes creating, merging or compiling them to generate a .isolated file.
   9
  10 See more information at
  11   https://code.google.com/p/swarming/wiki/IsolateDesign
  12   https://code.google.com/p/swarming/wiki/IsolateUserGuide
  13 """
  14 # Run ./isolate.py --help for more detailed information.
  15
  16 import ast
  17 import copy
  18 import itertools
  19 import logging
  20 import optparse
  21 import os
  22 import posixpath
  23 import re
  24 import stat
  25 import subprocess
  26 import sys
  27
  28 import isolateserver
  29 import run_isolated
  30 import trace_inputs
  31
  32 # Import here directly so isolate is easier to use as a library.
  33 from run_isolated import get_flavor
  34
  35 from third_party import colorama
  36 from third_party.depot_tools import fix_encoding
  37 from third_party.depot_tools import subcommand
  38
  39 from utils import file_path
  40 from utils import tools
  41 from utils import short_expression_finder
  42
  43
  44 __version__ = '0.1.1'
  45
  46
  47 PATH_VARIABLES = ('DEPTH', 'PRODUCT_DIR')
  48
  49 # Files that should be 0-length when mapped.
  50 KEY_TOUCHED = 'isolate_dependency_touched'
  51 # Files that should be tracked by the build tool.
  52 KEY_TRACKED = 'isolate_dependency_tracked'
  53 # Files that should not be tracked by the build tool.
  54 KEY_UNTRACKED = 'isolate_dependency_untracked'
  55
  56
  57 class ExecutionError(Exception):
  58   """A generic error occurred."""
  59   def __str__(self):
  60     return self.args[0]
  61
  62
  63 ### Path handling code.
  64
  65
  66 DEFAULT_BLACKLIST = (
  67   # Temporary vim or python files.
  68   r'^.+\.(?:pyc|swp)$',
  69   # .git or .svn directory.
  70   r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
  71 )
  72
  73
  74 # Chromium-specific.
  75 DEFAULT_BLACKLIST += (
  76   r'^.+\.(?:run_test_cases)$',
  77   r'^(?:.+' + re.escape(os.path.sep) + r'|)testserver\.log$',
  78 )
  79
  80
  81 def relpath(path, root):
  82   """os.path.relpath() that keeps trailing os.path.sep."""
  83   out = os.path.relpath(path, root)
  84   if path.endswith(os.path.sep):
  85     out += os.path.sep
  86   return out
  87
  88
  89 def safe_relpath(filepath, basepath):
  90   """Do not throw on Windows when filepath and basepath are on different drives.
  91
  92   Different than relpath() above since this one doesn't keep the trailing
  93   os.path.sep and it swallows exceptions on Windows and return the original
  94   absolute path in the case of different drives.
  95   """
  96   try:
  97     return os.path.relpath(filepath, basepath)
  98   except ValueError:
  99     assert sys.platform == 'win32'
 100     return filepath
 101
 102
 103 def normpath(path):
 104   """os.path.normpath() that keeps trailing os.path.sep."""
 105   out = os.path.normpath(path)
 106   if path.endswith(os.path.sep):
 107     out += os.path.sep
 108   return out
 109
 110
 111 def posix_relpath(path, root):
 112   """posix.relpath() that keeps trailing slash."""
 113   out = posixpath.relpath(path, root)
 114   if path.endswith('/'):
 115     out += '/'
 116   return out
 117
 118
 119 def cleanup_path(x):
 120   """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""
 121   if x:
 122     x = x.rstrip(os.path.sep).replace(os.path.sep, '/')
 123   if x == '.':
 124     x = ''
 125   if x:
 126     x += '/'
 127   return x
 128
 129
 130 def is_url(path):
 131   return bool(re.match(r'^https?://.+$', path))
 132
 133
 134 def path_starts_with(prefix, path):
 135   """Returns true if the components of the path |prefix| are the same as the
 136   initial components of |path| (or all of the components of |path|). The paths
 137   must be absolute.
 138   """
 139   assert os.path.isabs(prefix) and os.path.isabs(path)
 140   prefix = os.path.normpath(prefix)
 141   path = os.path.normpath(path)
 142   assert prefix == file_path.get_native_path_case(prefix), prefix
 143   assert path == file_path.get_native_path_case(path), path
 144   prefix = prefix.rstrip(os.path.sep) + os.path.sep
 145   path = path.rstrip(os.path.sep) + os.path.sep
 146   return path.startswith(prefix)
 147
 148
 149 def fix_native_path_case(root, path):
 150   """Ensures that each component of |path| has the proper native case by
 151      iterating slowly over the directory elements of |path|."""
 152   native_case_path = root
 153   for raw_part in path.split(os.sep):
 154     if not raw_part or raw_part == '.':
 155       break
 156
 157     part = file_path.find_item_native_case(native_case_path, raw_part)
 158     if not part:
 159       raise isolateserver.MappingError(
 160           'Input file %s doesn\'t exist' %
 161           os.path.join(native_case_path, raw_part))
 162     native_case_path = os.path.join(native_case_path, part)
 163
 164   return os.path.normpath(native_case_path)
 165
 166
 167 def expand_symlinks(indir, relfile):
 168   """Follows symlinks in |relfile|, but treating symlinks that point outside the
 169   build tree as if they were ordinary directories/files. Returns the final
 170   symlink-free target and a list of paths to symlinks encountered in the
 171   process.
 172
 173   The rule about symlinks outside the build tree is for the benefit of the
 174   Chromium OS ebuild, which symlinks the output directory to an unrelated path
 175   in the chroot.
 176
 177   Fails when a directory loop is detected, although in theory we could support
 178   that case.
 179   """
 180   is_directory = relfile.endswith(os.path.sep)
 181   done = indir
 182   todo = relfile.strip(os.path.sep)
 183   symlinks = []
 184
 185   while todo:
 186     pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
 187         done, todo)
 188     if not symlink:
 189       todo = fix_native_path_case(done, todo)
 190       done = os.path.join(done, todo)
 191       break
 192     symlink_path = os.path.join(done, pre_symlink, symlink)
 193     post_symlink = post_symlink.lstrip(os.path.sep)
 194     # readlink doesn't exist on Windows.
 195     # pylint: disable=E1101
 196     target = os.path.normpath(os.path.join(done, pre_symlink))
 197     symlink_target = os.readlink(symlink_path)
 198     if os.path.isabs(symlink_target):
 199       # Absolute path are considered a normal directories. The use case is
 200       # generally someone who puts the output directory on a separate drive.
 201       target = symlink_target
 202     else:
 203       # The symlink itself could be using the wrong path case.
 204       target = fix_native_path_case(target, symlink_target)
 205
 206     if not os.path.exists(target):
 207       raise isolateserver.MappingError(
 208           'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
 209     target = file_path.get_native_path_case(target)
 210     if not path_starts_with(indir, target):
 211       done = symlink_path
 212       todo = post_symlink
 213       continue
 214     if path_starts_with(target, symlink_path):
 215       raise isolateserver.MappingError(
 216           'Can\'t map recursive symlink reference %s -> %s' %
 217           (symlink_path, target))
 218     logging.info('Found symlink: %s -> %s', symlink_path, target)
 219     symlinks.append(os.path.relpath(symlink_path, indir))
 220     # Treat the common prefix of the old and new paths as done, and start
 221     # scanning again.
 222     target = target.split(os.path.sep)
 223     symlink_path = symlink_path.split(os.path.sep)
 224     prefix_length = 0
 225     for target_piece, symlink_path_piece in zip(target, symlink_path):
 226       if target_piece == symlink_path_piece:
 227         prefix_length += 1
 228       else:
 229         break
 230     done = os.path.sep.join(target[:prefix_length])
 231     todo = os.path.join(
 232         os.path.sep.join(target[prefix_length:]), post_symlink)
 233
 234   relfile = os.path.relpath(done, indir)
 235   relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
 236   return relfile, symlinks
 237
 238
 239 def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
 240   """Expands a single input. It can result in multiple outputs.
 241
 242   This function is recursive when relfile is a directory.
 243
 244   Note: this code doesn't properly handle recursive symlink like one created
 245   with:
 246     ln -s .. foo
 247   """
 248   if os.path.isabs(relfile):
 249     raise isolateserver.MappingError(
 250         'Can\'t map absolute path %s' % relfile)
 251
 252   infile = normpath(os.path.join(indir, relfile))
 253   if not infile.startswith(indir):
 254     raise isolateserver.MappingError(
 255         'Can\'t map file %s outside %s' % (infile, indir))
 256
 257   filepath = os.path.join(indir, relfile)
 258   native_filepath = file_path.get_native_path_case(filepath)
 259   if filepath != native_filepath:
 260     # Special case './'.
 261     if filepath != native_filepath + '.' + os.path.sep:
 262       # Give up enforcing strict path case on OSX. Really, it's that sad. The
 263       # case where it happens is very specific and hard to reproduce:
 264       # get_native_path_case(
 265       #    u'Foo.framework/Versions/A/Resources/Something.nib') will return
 266       # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
 267       #
 268       # Note that this is really something deep in OSX because running
 269       # ls Foo.framework/Versions/A
 270       # will print out 'Resources', while file_path.get_native_path_case()
 271       # returns a lower case 'r'.
 272       #
 273       # So *something* is happening under the hood resulting in the command 'ls'
 274       # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree.  We
 275       # have no idea why.
 276       if sys.platform != 'darwin':
 277         raise isolateserver.MappingError(
 278             'File path doesn\'t equal native file path\n%s != %s' %
 279             (filepath, native_filepath))
 280
 281   symlinks = []
 282   if follow_symlinks:
 283     relfile, symlinks = expand_symlinks(indir, relfile)
 284
 285   if relfile.endswith(os.path.sep):
 286     if not os.path.isdir(infile):
 287       raise isolateserver.MappingError(
 288           '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
 289
 290     # Special case './'.
 291     if relfile.startswith('.' + os.path.sep):
 292       relfile = relfile[2:]
 293     outfiles = symlinks
 294     try:
 295       for filename in os.listdir(infile):
 296         inner_relfile = os.path.join(relfile, filename)
 297         if blacklist(inner_relfile):
 298           continue
 299         if os.path.isdir(os.path.join(indir, inner_relfile)):
 300           inner_relfile += os.path.sep
 301         outfiles.extend(
 302             expand_directory_and_symlink(indir, inner_relfile, blacklist,
 303                                          follow_symlinks))
 304       return outfiles
 305     except OSError as e:
 306       raise isolateserver.MappingError(
 307           'Unable to iterate over directory %s.\n%s' % (infile, e))
 308   else:
 309     # Always add individual files even if they were blacklisted.
 310     if os.path.isdir(infile):
 311       raise isolateserver.MappingError(
 312           'Input directory %s must have a trailing slash' % infile)
 313
 314     if not os.path.isfile(infile):
 315       raise isolateserver.MappingError(
 316           'Input file %s doesn\'t exist' % infile)
 317
 318     return symlinks + [relfile]
 319
 320
 321 def expand_directories_and_symlinks(indir, infiles, blacklist,
 322                                     follow_symlinks, ignore_broken_items):
 323   """Expands the directories and the symlinks, applies the blacklist and
 324   verifies files exist.
 325
 326   Files are specified in os native path separator.
 327   """
 328   outfiles = []
 329   for relfile in infiles:
 330     try:
 331       outfiles.extend(expand_directory_and_symlink(indir, relfile, blacklist,
 332                                                    follow_symlinks))
 333     except isolateserver.MappingError as e:
 334       if ignore_broken_items:
 335         logging.info('warning: %s', e)
 336       else:
 337         raise
 338   return outfiles
 339
 340
 341 def recreate_tree(outdir, indir, infiles, action, as_hash):
 342   """Creates a new tree with only the input files in it.
 343
 344   Arguments:
 345     outdir:    Output directory to create the files in.
 346     indir:     Root directory the infiles are based in.
 347     infiles:   dict of files to map from |indir| to |outdir|.
 348     action:    One of accepted action of run_isolated.link_file().
 349     as_hash:   Output filename is the hash instead of relfile.
 350   """
 351   logging.info(
 352       'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_hash=%s)' %
 353       (outdir, indir, len(infiles), action, as_hash))
 354
 355   assert os.path.isabs(outdir) and outdir == os.path.normpath(outdir), outdir
 356   if not os.path.isdir(outdir):
 357     logging.info('Creating %s' % outdir)
 358     os.makedirs(outdir)
 359
 360   for relfile, metadata in infiles.iteritems():
 361     infile = os.path.join(indir, relfile)
 362     if as_hash:
 363       # Do the hashtable specific checks.
 364       if 'l' in metadata:
 365         # Skip links when storing a hashtable.
 366         continue
 367       outfile = os.path.join(outdir, metadata['h'])
 368       if os.path.isfile(outfile):
 369         # Just do a quick check that the file size matches. No need to stat()
 370         # again the input file, grab the value from the dict.
 371         if not 's' in metadata:
 372           raise isolateserver.MappingError(
 373               'Misconfigured item %s: %s' % (relfile, metadata))
 374         if metadata['s'] == os.stat(outfile).st_size:
 375           continue
 376         else:
 377           logging.warn('Overwritting %s' % metadata['h'])
 378           os.remove(outfile)
 379     else:
 380       outfile = os.path.join(outdir, relfile)
 381       outsubdir = os.path.dirname(outfile)
 382       if not os.path.isdir(outsubdir):
 383         os.makedirs(outsubdir)
 384
 385     # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
 386     # if metadata.get('T') == True:
 387     #   open(outfile, 'ab').close()
 388     if 'l' in metadata:
 389       pointed = metadata['l']
 390       logging.debug('Symlink: %s -> %s' % (outfile, pointed))
 391       # symlink doesn't exist on Windows.
 392       os.symlink(pointed, outfile)  # pylint: disable=E1101
 393     else:
 394       run_isolated.link_file(outfile, infile, action)
 395
 396
 397 def process_input(filepath, prevdict, read_only, flavor, algo):
 398   """Processes an input file, a dependency, and return meta data about it.
 399
 400   Arguments:
 401   - filepath: File to act on.
 402   - prevdict: the previous dictionary. It is used to retrieve the cached sha-1
 403               to skip recalculating the hash.
 404   - read_only: If True, the file mode is manipulated. In practice, only save
 405                one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
 406                windows, mode is not set since all files are 'executable' by
 407                default.
 408   - algo:      Hashing algorithm used.
 409
 410   Behaviors:
 411   - Retrieves the file mode, file size, file timestamp, file link
 412     destination if it is a file link and calcultate the SHA-1 of the file's
 413     content if the path points to a file and not a symlink.
 414   """
 415   out = {}
 416   # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
 417   # if prevdict.get('T') == True:
 418   #   # The file's content is ignored. Skip the time and hard code mode.
 419   #   if get_flavor() != 'win':
 420   #     out['m'] = stat.S_IRUSR | stat.S_IRGRP
 421   #   out['s'] = 0
 422   #   out['h'] = algo().hexdigest()
 423   #   out['T'] = True
 424   #   return out
 425
 426   # Always check the file stat and check if it is a link. The timestamp is used
 427   # to know if the file's content/symlink destination should be looked into.
 428   # E.g. only reuse from prevdict if the timestamp hasn't changed.
 429   # There is the risk of the file's timestamp being reset to its last value
 430   # manually while its content changed. We don't protect against that use case.
 431   try:
 432     filestats = os.lstat(filepath)
 433   except OSError:
 434     # The file is not present.
 435     raise isolateserver.MappingError('%s is missing' % filepath)
 436   is_link = stat.S_ISLNK(filestats.st_mode)
 437
 438   if flavor != 'win':
 439     # Ignore file mode on Windows since it's not really useful there.
 440     filemode = stat.S_IMODE(filestats.st_mode)
 441     # Remove write access for group and all access to 'others'.
 442     filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
 443     if read_only:
 444       filemode &= ~stat.S_IWUSR
 445     if filemode & stat.S_IXUSR:
 446       filemode |= stat.S_IXGRP
 447     else:
 448       filemode &= ~stat.S_IXGRP
 449     if not is_link:
 450       out['m'] = filemode
 451
 452   # Used to skip recalculating the hash or link destination. Use the most recent
 453   # update time.
 454   # TODO(maruel): Save it in the .state file instead of .isolated so the
 455   # .isolated file is deterministic.
 456   out['t'] = int(round(filestats.st_mtime))
 457
 458   if not is_link:
 459     out['s'] = filestats.st_size
 460     # If the timestamp wasn't updated and the file size is still the same, carry
 461     # on the sha-1.
 462     if (prevdict.get('t') == out['t'] and
 463         prevdict.get('s') == out['s']):
 464       # Reuse the previous hash if available.
 465       out['h'] = prevdict.get('h')
 466     if not out.get('h'):
 467       out['h'] = isolateserver.hash_file(filepath, algo)
 468   else:
 469     # If the timestamp wasn't updated, carry on the link destination.
 470     if prevdict.get('t') == out['t']:
 471       # Reuse the previous link destination if available.
 472       out['l'] = prevdict.get('l')
 473     if out.get('l') is None:
 474       # The link could be in an incorrect path case. In practice, this only
 475       # happen on OSX on case insensitive HFS.
 476       # TODO(maruel): It'd be better if it was only done once, in
 477       # expand_directory_and_symlink(), so it would not be necessary to do again
 478       # here.
 479       symlink_value = os.readlink(filepath)  # pylint: disable=E1101
 480       filedir = file_path.get_native_path_case(os.path.dirname(filepath))
 481       native_dest = fix_native_path_case(filedir, symlink_value)
 482       out['l'] = os.path.relpath(native_dest, filedir)
 483   return out
 484
 485
 486 ### Variable stuff.
 487
 488
 489 def isolatedfile_to_state(filename):
 490   """Replaces the file's extension."""
 491   return filename + '.state'
 492
 493
 494 def determine_root_dir(relative_root, infiles):
 495   """For a list of infiles, determines the deepest root directory that is
 496   referenced indirectly.
 497
 498   All arguments must be using os.path.sep.
 499   """
 500   # The trick used to determine the root directory is to look at "how far" back
 501   # up it is looking up.
 502   deepest_root = relative_root
 503   for i in infiles:
 504     x = relative_root
 505     while i.startswith('..' + os.path.sep):
 506       i = i[3:]
 507       assert not i.startswith(os.path.sep)
 508       x = os.path.dirname(x)
 509     if deepest_root.startswith(x):
 510       deepest_root = x
 511   logging.debug(
 512       'determine_root_dir(%s, %d files) -> %s' % (
 513           relative_root, len(infiles), deepest_root))
 514   return deepest_root
 515
 516
 517 def replace_variable(part, variables):
 518   m = re.match(r'<\(([A-Z_]+)\)', part)
 519   if m:
 520     if m.group(1) not in variables:
 521       raise ExecutionError(
 522         'Variable "%s" was not found in %s.\nDid you forget to specify '
 523         '--variable?' % (m.group(1), variables))
 524     return variables[m.group(1)]
 525   return part
 526
 527
 528 def process_variables(cwd, variables, relative_base_dir):
 529   """Processes path variables as a special case and returns a copy of the dict.
 530
 531   For each 'path' variable: first normalizes it based on |cwd|, verifies it
 532   exists then sets it as relative to relative_base_dir.
 533   """
 534   relative_base_dir = file_path.get_native_path_case(relative_base_dir)
 535   variables = variables.copy()
 536   for i in PATH_VARIABLES:
 537     if i not in variables:
 538       continue
 539     variable = variables[i].strip()
 540     # Variables could contain / or \ on windows. Always normalize to
 541     # os.path.sep.
 542     variable = variable.replace('/', os.path.sep)
 543     variable = os.path.join(cwd, variable)
 544     variable = os.path.normpath(variable)
 545     variable = file_path.get_native_path_case(variable)
 546     if not os.path.isdir(variable):
 547       raise ExecutionError('%s=%s is not a directory' % (i, variable))
 548
 549     # All variables are relative to the .isolate file.
 550     variable = os.path.relpath(variable, relative_base_dir)
 551     logging.debug(
 552         'Translated variable %s from %s to %s', i, variables[i], variable)
 553     variables[i] = variable
 554   return variables
 555
 556
 557 def eval_variables(item, variables):
 558   """Replaces the .isolate variables in a string item.
 559
 560   Note that the .isolate format is a subset of the .gyp dialect.
 561   """
 562   return ''.join(
 563       replace_variable(p, variables) for p in re.split(r'(<\([A-Z_]+\))', item))
 564
 565
 566 def classify_files(root_dir, tracked, untracked):
 567   """Converts the list of files into a .isolate 'variables' dictionary.
 568
 569   Arguments:
 570   - tracked: list of files names to generate a dictionary out of that should
 571              probably be tracked.
 572   - untracked: list of files names that must not be tracked.
 573   """
 574   # These directories are not guaranteed to be always present on every builder.
 575   OPTIONAL_DIRECTORIES = (
 576     'test/data/plugin',
 577     'third_party/WebKit/LayoutTests',
 578   )
 579
 580   new_tracked = []
 581   new_untracked = list(untracked)
 582
 583   def should_be_tracked(filepath):
 584     """Returns True if it is a file without whitespace in a non-optional
 585     directory that has no symlink in its path.
 586     """
 587     if filepath.endswith('/'):
 588       return False
 589     if ' ' in filepath:
 590       return False
 591     if any(i in filepath for i in OPTIONAL_DIRECTORIES):
 592       return False
 593     # Look if any element in the path is a symlink.
 594     split = filepath.split('/')
 595     for i in range(len(split)):
 596       if os.path.islink(os.path.join(root_dir, '/'.join(split[:i+1]))):
 597         return False
 598     return True
 599
 600   for filepath in sorted(tracked):
 601     if should_be_tracked(filepath):
 602       new_tracked.append(filepath)
 603     else:
 604       # Anything else.
 605       new_untracked.append(filepath)
 606
 607   variables = {}
 608   if new_tracked:
 609     variables[KEY_TRACKED] = sorted(new_tracked)
 610   if new_untracked:
 611     variables[KEY_UNTRACKED] = sorted(new_untracked)
 612   return variables
 613
 614
 615 def chromium_fix(f, variables):
 616   """Fixes an isolate dependnecy with Chromium-specific fixes."""
 617   # Skip log in PRODUCT_DIR. Note that these are applied on '/' style path
 618   # separator.
 619   LOG_FILE = re.compile(r'^\<\(PRODUCT_DIR\)\/[^\/]+\.log$')
 620   # Ignored items.
 621   IGNORED_ITEMS = (
 622       # http://crbug.com/160539, on Windows, it's in chrome/.
 623       'Media Cache/',
 624       'chrome/Media Cache/',
 625       # 'First Run' is not created by the compile, but by the test itself.
 626       '<(PRODUCT_DIR)/First Run')
 627
 628   # Blacklist logs and other unimportant files.
 629   if LOG_FILE.match(f) or f in IGNORED_ITEMS:
 630     logging.debug('Ignoring %s', f)
 631     return None
 632
 633   EXECUTABLE = re.compile(
 634       r'^(\<\(PRODUCT_DIR\)\/[^\/\.]+)' +
 635       re.escape(variables.get('EXECUTABLE_SUFFIX', '')) +
 636       r'$')
 637   match = EXECUTABLE.match(f)
 638   if match:
 639     return match.group(1) + '<(EXECUTABLE_SUFFIX)'
 640
 641   if sys.platform == 'darwin':
 642     # On OSX, the name of the output is dependent on gyp define, it can be
 643     # 'Google Chrome.app' or 'Chromium.app', same for 'XXX
 644     # Framework.framework'. Furthermore, they are versioned with a gyp
 645     # variable.  To lower the complexity of the .isolate file, remove all the
 646     # individual entries that show up under any of the 4 entries and replace
 647     # them with the directory itself. Overall, this results in a bit more
 648     # files than strictly necessary.
 649     OSX_BUNDLES = (
 650       '<(PRODUCT_DIR)/Chromium Framework.framework/',
 651       '<(PRODUCT_DIR)/Chromium.app/',
 652       '<(PRODUCT_DIR)/Google Chrome Framework.framework/',
 653       '<(PRODUCT_DIR)/Google Chrome.app/',
 654     )
 655     for prefix in OSX_BUNDLES:
 656       if f.startswith(prefix):
 657         # Note this result in duplicate values, so the a set() must be used to
 658         # remove duplicates.
 659         return prefix
 660   return f
 661
 662
 663 def generate_simplified(
 664     tracked, untracked, touched, root_dir, variables, relative_cwd,
 665     trace_blacklist):
 666   """Generates a clean and complete .isolate 'variables' dictionary.
 667
 668   Cleans up and extracts only files from within root_dir then processes
 669   variables and relative_cwd.
 670   """
 671   root_dir = os.path.realpath(root_dir)
 672   logging.info(
 673       'generate_simplified(%d files, %s, %s, %s)' %
 674       (len(tracked) + len(untracked) + len(touched),
 675         root_dir, variables, relative_cwd))
 676
 677   # Preparation work.
 678   relative_cwd = cleanup_path(relative_cwd)
 679   assert not os.path.isabs(relative_cwd), relative_cwd
 680   # Creates the right set of variables here. We only care about PATH_VARIABLES.
 681   path_variables = dict(
 682       ('<(%s)' % k, variables[k].replace(os.path.sep, '/'))
 683       for k in PATH_VARIABLES if k in variables)
 684   variables = variables.copy()
 685   variables.update(path_variables)
 686
 687   # Actual work: Process the files.
 688   # TODO(maruel): if all the files in a directory are in part tracked and in
 689   # part untracked, the directory will not be extracted. Tracked files should be
 690   # 'promoted' to be untracked as needed.
 691   tracked = trace_inputs.extract_directories(
 692       root_dir, tracked, trace_blacklist)
 693   untracked = trace_inputs.extract_directories(
 694       root_dir, untracked, trace_blacklist)
 695   # touched is not compressed, otherwise it would result in files to be archived
 696   # that we don't need.
 697
 698   root_dir_posix = root_dir.replace(os.path.sep, '/')
 699   def fix(f):
 700     """Bases the file on the most restrictive variable."""
 701     # Important, GYP stores the files with / and not \.
 702     f = f.replace(os.path.sep, '/')
 703     logging.debug('fix(%s)' % f)
 704     # If it's not already a variable.
 705     if not f.startswith('<'):
 706       # relative_cwd is usually the directory containing the gyp file. It may be
 707       # empty if the whole directory containing the gyp file is needed.
 708       # Use absolute paths in case cwd_dir is outside of root_dir.
 709       # Convert the whole thing to / since it's isolate's speak.
 710       f = posix_relpath(
 711           posixpath.join(root_dir_posix, f),
 712           posixpath.join(root_dir_posix, relative_cwd)) or './'
 713
 714     for variable, root_path in path_variables.iteritems():
 715       if f.startswith(root_path):
 716         f = variable + f[len(root_path):]
 717         logging.debug('Converted to %s' % f)
 718         break
 719     return f
 720
 721   def fix_all(items):
 722     """Reduces the items to convert variables, removes unneeded items, apply
 723     chromium-specific fixes and only return unique items.
 724     """
 725     variables_converted = (fix(f.path) for f in items)
 726     chromium_fixed = (chromium_fix(f, variables) for f in variables_converted)
 727     return set(f for f in chromium_fixed if f)
 728
 729   tracked = fix_all(tracked)
 730   untracked = fix_all(untracked)
 731   touched = fix_all(touched)
 732   out = classify_files(root_dir, tracked, untracked)
 733   if touched:
 734     out[KEY_TOUCHED] = sorted(touched)
 735   return out
 736
 737
 738 def chromium_filter_flags(variables):
 739   """Filters out build flags used in Chromium that we don't want to treat as
 740   configuration variables.
 741   """
 742   # TODO(benrg): Need a better way to determine this.
 743   blacklist = set(PATH_VARIABLES + ('EXECUTABLE_SUFFIX', 'FLAG'))
 744   return dict((k, v) for k, v in variables.iteritems() if k not in blacklist)
 745
 746
 747 def generate_isolate(
 748     tracked, untracked, touched, root_dir, variables, relative_cwd,
 749     trace_blacklist):
 750   """Generates a clean and complete .isolate file."""
 751   dependencies = generate_simplified(
 752       tracked, untracked, touched, root_dir, variables, relative_cwd,
 753       trace_blacklist)
 754   config_variables = chromium_filter_flags(variables)
 755   config_variable_names, config_values = zip(
 756       *sorted(config_variables.iteritems()))
 757   out = Configs(None)
 758   # The new dependencies apply to just one configuration, namely config_values.
 759   out.merge_dependencies(dependencies, config_variable_names, [config_values])
 760   return out.make_isolate_file()
 761
 762
 763 def split_touched(files):
 764   """Splits files that are touched vs files that are read."""
 765   tracked = []
 766   touched = []
 767   for f in files:
 768     if f.size:
 769       tracked.append(f)
 770     else:
 771       touched.append(f)
 772   return tracked, touched
 773
 774
 775 def pretty_print(variables, stdout):
 776   """Outputs a gyp compatible list from the decoded variables.
 777
 778   Similar to pprint.print() but with NIH syndrome.
 779   """
 780   # Order the dictionary keys by these keys in priority.
 781   ORDER = (
 782       'variables', 'condition', 'command', 'relative_cwd', 'read_only',
 783       KEY_TRACKED, KEY_UNTRACKED)
 784
 785   def sorting_key(x):
 786     """Gives priority to 'most important' keys before the others."""
 787     if x in ORDER:
 788       return str(ORDER.index(x))
 789     return x
 790
 791   def loop_list(indent, items):
 792     for item in items:
 793       if isinstance(item, basestring):
 794         stdout.write('%s\'%s\',\n' % (indent, item))
 795       elif isinstance(item, dict):
 796         stdout.write('%s{\n' % indent)
 797         loop_dict(indent + '  ', item)
 798         stdout.write('%s},\n' % indent)
 799       elif isinstance(item, list):
 800         # A list inside a list will write the first item embedded.
 801         stdout.write('%s[' % indent)
 802         for index, i in enumerate(item):
 803           if isinstance(i, basestring):
 804             stdout.write(
 805                 '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))
 806           elif isinstance(i, dict):
 807             stdout.write('{\n')
 808             loop_dict(indent + '  ', i)
 809             if index != len(item) - 1:
 810               x = ', '
 811             else:
 812               x = ''
 813             stdout.write('%s}%s' % (indent, x))
 814           else:
 815             assert False
 816         stdout.write('],\n')
 817       else:
 818         assert False
 819
 820   def loop_dict(indent, items):
 821     for key in sorted(items, key=sorting_key):
 822       item = items[key]
 823       stdout.write("%s'%s': " % (indent, key))
 824       if isinstance(item, dict):
 825         stdout.write('{\n')
 826         loop_dict(indent + '  ', item)
 827         stdout.write(indent + '},\n')
 828       elif isinstance(item, list):
 829         stdout.write('[\n')
 830         loop_list(indent + '  ', item)
 831         stdout.write(indent + '],\n')
 832       elif isinstance(item, basestring):
 833         stdout.write(
 834             '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))
 835       elif item in (True, False, None):
 836         stdout.write('%s\n' % item)
 837       else:
 838         assert False, item
 839
 840   stdout.write('{\n')
 841   loop_dict('  ', variables)
 842   stdout.write('}\n')
 843
 844
 845 def union(lhs, rhs):
 846   """Merges two compatible datastructures composed of dict/list/set."""
 847   assert lhs is not None or rhs is not None
 848   if lhs is None:
 849     return copy.deepcopy(rhs)
 850   if rhs is None:
 851     return copy.deepcopy(lhs)
 852   assert type(lhs) == type(rhs), (lhs, rhs)
 853   if hasattr(lhs, 'union'):
 854     # Includes set, ConfigSettings and Configs.
 855     return lhs.union(rhs)
 856   if isinstance(lhs, dict):
 857     return dict((k, union(lhs.get(k), rhs.get(k))) for k in set(lhs).union(rhs))
 858   elif isinstance(lhs, list):
 859     # Do not go inside the list.
 860     return lhs + rhs
 861   assert False, type(lhs)
 862
 863
 864 def extract_comment(content):
 865   """Extracts file level comment."""
 866   out = []
 867   for line in content.splitlines(True):
 868     if line.startswith('#'):
 869       out.append(line)
 870     else:
 871       break
 872   return ''.join(out)
 873
 874
 875 def eval_content(content):
 876   """Evaluates a python file and return the value defined in it.
 877
 878   Used in practice for .isolate files.
 879   """
 880   globs = {'__builtins__': None}
 881   locs = {}
 882   try:
 883     value = eval(content, globs, locs)
 884   except TypeError as e:
 885     e.args = list(e.args) + [content]
 886     raise
 887   assert locs == {}, locs
 888   assert globs == {'__builtins__': None}, globs
 889   return value
 890
 891
 892 def match_configs(expr, config_variables, all_configs):
 893   """Returns the configs from |all_configs| that match the |expr|, where
 894   the elements of |all_configs| are tuples of values for the |config_variables|.
 895   Example:
 896   >>> match_configs(expr = "(foo==1 or foo==2) and bar=='b'",
 897                     config_variables = ["foo", "bar"],
 898                     all_configs = [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')])
 899   [(1, 'b'), (2, 'b')]
 900   """
 901   return [
 902     config for config in all_configs
 903     if eval(expr, dict(zip(config_variables, config)))
 904   ]
 905
 906
 907 def verify_variables(variables):
 908   """Verifies the |variables| dictionary is in the expected format."""
 909   VALID_VARIABLES = [
 910     KEY_TOUCHED,
 911     KEY_TRACKED,
 912     KEY_UNTRACKED,
 913     'command',
 914     'read_only',
 915   ]
 916   assert isinstance(variables, dict), variables
 917   assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()
 918   for name, value in variables.iteritems():
 919     if name == 'read_only':
 920       assert value in (True, False, None), value
 921     else:
 922       assert isinstance(value, list), value
 923       assert all(isinstance(i, basestring) for i in value), value
 924
 925
 926 def verify_ast(expr, variables_and_values):
 927   """Verifies that |expr| is of the form
 928   expr ::= expr ( "or" | "and" ) expr
 929          | identifier "==" ( string | int )
 930   Also collects the variable identifiers and string/int values in the dict
 931   |variables_and_values|, in the form {'var': set([val1, val2, ...]), ...}.
 932   """
 933   assert isinstance(expr, (ast.BoolOp, ast.Compare))
 934   if isinstance(expr, ast.BoolOp):
 935     assert isinstance(expr.op, (ast.And, ast.Or))
 936     for subexpr in expr.values:
 937       verify_ast(subexpr, variables_and_values)
 938   else:
 939     assert isinstance(expr.left.ctx, ast.Load)
 940     assert len(expr.ops) == 1
 941     assert isinstance(expr.ops[0], ast.Eq)
 942     var_values = variables_and_values.setdefault(expr.left.id, set())
 943     rhs = expr.comparators[0]
 944     assert isinstance(rhs, (ast.Str, ast.Num))
 945     var_values.add(rhs.n if isinstance(rhs, ast.Num) else rhs.s)
 946
 947
 948 def verify_condition(condition, variables_and_values):
 949   """Verifies the |condition| dictionary is in the expected format.
 950   See verify_ast() for the meaning of |variables_and_values|.
 951   """
 952   VALID_INSIDE_CONDITION = ['variables']
 953   assert isinstance(condition, list), condition
 954   assert len(condition) == 2, condition
 955   expr, then = condition
 956
 957   test_ast = compile(expr, '<condition>', 'eval', ast.PyCF_ONLY_AST)
 958   verify_ast(test_ast.body, variables_and_values)
 959
 960   assert isinstance(then, dict), then
 961   assert set(VALID_INSIDE_CONDITION).issuperset(set(then)), then.keys()
 962   verify_variables(then['variables'])
 963
 964
 965 def verify_root(value, variables_and_values):
 966   """Verifies that |value| is the parsed form of a valid .isolate file.
 967   See verify_ast() for the meaning of |variables_and_values|.
 968   """
 969   VALID_ROOTS = ['includes', 'conditions']
 970   assert isinstance(value, dict), value
 971   assert set(VALID_ROOTS).issuperset(set(value)), value.keys()
 972
 973   includes = value.get('includes', [])
 974   assert isinstance(includes, list), includes
 975   for include in includes:
 976     assert isinstance(include, basestring), include
 977
 978   conditions = value.get('conditions', [])
 979   assert isinstance(conditions, list), conditions
 980   for condition in conditions:
 981     verify_condition(condition, variables_and_values)
 982
 983
 984 def remove_weak_dependencies(values, key, item, item_configs):
 985   """Removes any configs from this key if the item is already under a
 986   strong key.
 987   """
 988   if key == KEY_TOUCHED:
 989     item_configs = set(item_configs)
 990     for stronger_key in (KEY_TRACKED, KEY_UNTRACKED):
 991       try:
 992         item_configs -= values[stronger_key][item]
 993       except KeyError:
 994         pass
 995
 996   return item_configs
 997
 998
 999 def remove_repeated_dependencies(folders, key, item, item_configs):
1000   """Removes any configs from this key if the item is in a folder that is
1001   already included."""
1002
1003   if key in (KEY_UNTRACKED, KEY_TRACKED, KEY_TOUCHED):
1004     item_configs = set(item_configs)
1005     for (folder, configs) in folders.iteritems():
1006       if folder != item and item.startswith(folder):
1007         item_configs -= configs
1008
1009   return item_configs
1010
1011
1012 def get_folders(values_dict):
1013   """Returns a dict of all the folders in the given value_dict."""
1014   return dict(
1015     (item, configs) for (item, configs) in values_dict.iteritems()
1016     if item.endswith('/')
1017   )
1018
1019
1020 def invert_map(variables):
1021   """Converts {config: {deptype: list(depvals)}} to
1022   {deptype: {depval: set(configs)}}.
1023   """
1024   KEYS = (
1025     KEY_TOUCHED,
1026     KEY_TRACKED,
1027     KEY_UNTRACKED,
1028     'command',
1029     'read_only',
1030   )
1031   out = dict((key, {}) for key in KEYS)
1032   for config, values in variables.iteritems():
1033     for key in KEYS:
1034       if key == 'command':
1035         items = [tuple(values[key])] if key in values else []
1036       elif key == 'read_only':
1037         items = [values[key]] if key in values else []
1038       else:
1039         assert key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED)
1040         items = values.get(key, [])
1041       for item in items:
1042         out[key].setdefault(item, set()).add(config)
1043   return out
1044
1045
1046 def reduce_inputs(values):
1047   """Reduces the output of invert_map() to the strictest minimum list.
1048
1049   Looks at each individual file and directory, maps where they are used and
1050   reconstructs the inverse dictionary.
1051
1052   Returns the minimized dictionary.
1053   """
1054   KEYS = (
1055     KEY_TOUCHED,
1056     KEY_TRACKED,
1057     KEY_UNTRACKED,
1058     'command',
1059     'read_only',
1060   )
1061
1062   # Folders can only live in KEY_UNTRACKED.
1063   folders = get_folders(values.get(KEY_UNTRACKED, {}))
1064
1065   out = dict((key, {}) for key in KEYS)
1066   for key in KEYS:
1067     for item, item_configs in values.get(key, {}).iteritems():
1068       item_configs = remove_weak_dependencies(values, key, item, item_configs)
1069       item_configs = remove_repeated_dependencies(
1070           folders, key, item, item_configs)
1071       if item_configs:
1072         out[key][item] = item_configs
1073   return out
1074
1075
1076 def convert_map_to_isolate_dict(values, config_variables):
1077   """Regenerates back a .isolate configuration dict from files and dirs
1078   mappings generated from reduce_inputs().
1079   """
1080   # Gather a list of configurations for set inversion later.
1081   all_mentioned_configs = set()
1082   for configs_by_item in values.itervalues():
1083     for configs in configs_by_item.itervalues():
1084       all_mentioned_configs.update(configs)
1085
1086   # Invert the mapping to make it dict first.
1087   conditions = {}
1088   for key in values:
1089     for item, configs in values[key].iteritems():
1090       then = conditions.setdefault(frozenset(configs), {})
1091       variables = then.setdefault('variables', {})
1092
1093       if item in (True, False):
1094         # One-off for read_only.
1095         variables[key] = item
1096       else:
1097         assert item
1098         if isinstance(item, tuple):
1099           # One-off for command.
1100           # Do not merge lists and do not sort!
1101           # Note that item is a tuple.
1102           assert key not in variables
1103           variables[key] = list(item)
1104         else:
1105           # The list of items (files or dirs). Append the new item and keep
1106           # the list sorted.
1107           l = variables.setdefault(key, [])
1108           l.append(item)
1109           l.sort()
1110
1111   if all_mentioned_configs:
1112     config_values = map(set, zip(*all_mentioned_configs))
1113     sef = short_expression_finder.ShortExpressionFinder(
1114         zip(config_variables, config_values))
1115
1116   conditions = sorted(
1117       [sef.get_expr(configs), then] for configs, then in conditions.iteritems())
1118   return {'conditions': conditions}
1119
1120
1121 ### Internal state files.
1122
1123
1124 class ConfigSettings(object):
1125   """Represents the dependency variables for a single build configuration.
1126   The structure is immutable.
1127   """
1128   def __init__(self, config, values):
1129     self.config = config
1130     verify_variables(values)
1131     self.touched = sorted(values.get(KEY_TOUCHED, []))
1132     self.tracked = sorted(values.get(KEY_TRACKED, []))
1133     self.untracked = sorted(values.get(KEY_UNTRACKED, []))
1134     self.command = values.get('command', [])[:]
1135     self.read_only = values.get('read_only')
1136
1137   def union(self, rhs):
1138     assert not (self.config and rhs.config) or (self.config == rhs.config)
1139     assert not (self.command and rhs.command) or (self.command == rhs.command)
1140     var = {
1141       KEY_TOUCHED: sorted(self.touched + rhs.touched),
1142       KEY_TRACKED: sorted(self.tracked + rhs.tracked),
1143       KEY_UNTRACKED: sorted(self.untracked + rhs.untracked),
1144       'command': self.command or rhs.command,
1145       'read_only': rhs.read_only if self.read_only is None else self.read_only,
1146     }
1147     return ConfigSettings(self.config or rhs.config, var)
1148
1149   def flatten(self):
1150     out = {}
1151     if self.command:
1152       out['command'] = self.command
1153     if self.touched:
1154       out[KEY_TOUCHED] = self.touched
1155     if self.tracked:
1156       out[KEY_TRACKED] = self.tracked
1157     if self.untracked:
1158       out[KEY_UNTRACKED] = self.untracked
1159     if self.read_only is not None:
1160       out['read_only'] = self.read_only
1161     return out
1162
1163
1164 class Configs(object):
1165   """Represents a processed .isolate file.
1166
1167   Stores the file in a processed way, split by configuration.
1168   """
1169   def __init__(self, file_comment):
1170     self.file_comment = file_comment
1171     # The keys of by_config are tuples of values for the configuration
1172     # variables. The names of the variables (which must be the same for
1173     # every by_config key) are kept in config_variables. Initially by_config
1174     # is empty and we don't know what configuration variables will be used,
1175     # so config_variables also starts out empty. It will be set by the first
1176     # call to union() or merge_dependencies().
1177     self.by_config = {}
1178     self.config_variables = ()
1179
1180   def union(self, rhs):
1181     """Adds variables from rhs (a Configs) to the existing variables.
1182     """
1183     config_variables = self.config_variables
1184     if not config_variables:
1185       config_variables = rhs.config_variables
1186     else:
1187       # We can't proceed if this isn't true since we don't know the correct
1188       # default values for extra variables. The variables are sorted so we
1189       # don't need to worry about permutations.
1190       if rhs.config_variables and rhs.config_variables != config_variables:
1191         raise ExecutionError(
1192             'Variables in merged .isolate files do not match: %r and %r' % (
1193                 config_variables, rhs.config_variables))
1194
1195     # Takes the first file comment, prefering lhs.
1196     out = Configs(self.file_comment or rhs.file_comment)
1197     out.config_variables = config_variables
1198     for config in set(self.by_config) | set(rhs.by_config):
1199       out.by_config[config] = union(
1200           self.by_config.get(config), rhs.by_config.get(config))
1201     return out
1202
1203   def merge_dependencies(self, values, config_variables, configs):
1204     """Adds new dependencies to this object for the given configurations.
1205     Arguments:
1206       values: A variables dict as found in a .isolate file, e.g.,
1207           {KEY_TOUCHED: [...], 'command': ...}.
1208       config_variables: An ordered list of configuration variables, e.g.,
1209           ["OS", "chromeos"]. If this object already contains any dependencies,
1210           the configuration variables must match.
1211       configs: a list of tuples of values of the configuration variables,
1212           e.g., [("mac", 0), ("linux", 1)]. The dependencies in |values|
1213           are added to all of these configurations, and other configurations
1214           are unchanged.
1215     """
1216     if not values:
1217       return
1218
1219     if not self.config_variables:
1220       self.config_variables = config_variables
1221     else:
1222       # See comment in Configs.union().
1223       assert self.config_variables == config_variables
1224
1225     for config in configs:
1226       self.by_config[config] = union(
1227           self.by_config.get(config), ConfigSettings(config, values))
1228
1229   def flatten(self):
1230     """Returns a flat dictionary representation of the configuration.
1231     """
1232     return dict((k, v.flatten()) for k, v in self.by_config.iteritems())
1233
1234   def make_isolate_file(self):
1235     """Returns a dictionary suitable for writing to a .isolate file.
1236     """
1237     dependencies_by_config = self.flatten()
1238     configs_by_dependency = reduce_inputs(invert_map(dependencies_by_config))
1239     return convert_map_to_isolate_dict(configs_by_dependency,
1240                                        self.config_variables)
1241
1242
1243 # TODO(benrg): Remove this function when no old-format files are left.
1244 def convert_old_to_new_format(value):
1245   """Converts from the old .isolate format, which only has one variable (OS),
1246   always includes 'linux', 'mac' and 'win' in the set of valid values for OS,
1247   and allows conditions that depend on the set of all OSes, to the new format,
1248   which allows any set of variables, has no hardcoded values, and only allows
1249   explicit positive tests of variable values.
1250   """
1251   conditions = value.get('conditions', [])
1252   if 'variables' not in value and all(len(cond) == 2 for cond in conditions):
1253     return value  # Nothing to change
1254
1255   def parse_condition(cond):
1256     return re.match(r'OS=="(\w+)"\Z', cond[0]).group(1)
1257
1258   oses = set(map(parse_condition, conditions))
1259   default_oses = set(['linux', 'mac', 'win'])
1260   oses = sorted(oses | default_oses)
1261
1262   def if_not_os(not_os, then):
1263     expr = ' or '.join('OS=="%s"' % os for os in oses if os != not_os)
1264     return [expr, then]
1265
1266   conditions = [
1267     cond[:2] for cond in conditions if cond[1]
1268   ] + [
1269     if_not_os(parse_condition(cond), cond[2])
1270     for cond in conditions if len(cond) == 3
1271   ]
1272
1273   if 'variables' in value:
1274     conditions.append(if_not_os(None, {'variables': value.pop('variables')}))
1275   conditions.sort()
1276
1277   value = value.copy()
1278   value['conditions'] = conditions
1279   return value
1280
1281
1282 def load_isolate_as_config(isolate_dir, value, file_comment):
1283   """Parses one .isolate file and returns a Configs() instance.
1284
1285   |value| is the loaded dictionary that was defined in the gyp file.
1286
1287   The expected format is strict, anything diverting from the format below will
1288   throw an assert:
1289   {
1290     'includes': [
1291       'foo.isolate',
1292     ],
1293     'conditions': [
1294       ['OS=="vms" and foo=42', {
1295         'variables': {
1296           'command': [
1297             ...
1298           ],
1299           'isolate_dependency_tracked': [
1300             ...
1301           ],
1302           'isolate_dependency_untracked': [
1303             ...
1304           ],
1305           'read_only': False,
1306         },
1307       }],
1308       ...
1309     ],
1310   }
1311   """
1312   value = convert_old_to_new_format(value)
1313
1314   variables_and_values = {}
1315   verify_root(value, variables_and_values)
1316   if variables_and_values:
1317     config_variables, config_values = zip(
1318         *sorted(variables_and_values.iteritems()))
1319     all_configs = list(itertools.product(*config_values))
1320   else:
1321     config_variables = None
1322     all_configs = []
1323
1324   isolate = Configs(file_comment)
1325
1326   # Add configuration-specific variables.
1327   for expr, then in value.get('conditions', []):
1328     configs = match_configs(expr, config_variables, all_configs)
1329     isolate.merge_dependencies(then['variables'], config_variables, configs)
1330
1331   # Load the includes.
1332   for include in value.get('includes', []):
1333     if os.path.isabs(include):
1334       raise ExecutionError(
1335           'Failed to load configuration; absolute include path \'%s\'' %
1336           include)
1337     included_isolate = os.path.normpath(os.path.join(isolate_dir, include))
1338     with open(included_isolate, 'r') as f:
1339       included_isolate = load_isolate_as_config(
1340           os.path.dirname(included_isolate),
1341           eval_content(f.read()),
1342           None)
1343     isolate = union(isolate, included_isolate)
1344
1345   return isolate
1346
1347
1348 def load_isolate_for_config(isolate_dir, content, variables):
1349   """Loads the .isolate file and returns the information unprocessed but
1350   filtered for the specific OS.
1351
1352   Returns the command, dependencies and read_only flag. The dependencies are
1353   fixed to use os.path.sep.
1354   """
1355   # Load the .isolate file, process its conditions, retrieve the command and
1356   # dependencies.
1357   isolate = load_isolate_as_config(isolate_dir, eval_content(content), None)
1358   try:
1359     config_name = tuple(variables[var] for var in isolate.config_variables)
1360   except KeyError:
1361     raise ExecutionError(
1362         'These configuration variables were missing from the command line: %s' %
1363         ', '.join(sorted(set(isolate.config_variables) - set(variables))))
1364   config = isolate.by_config.get(config_name)
1365   if not config:
1366     raise ExecutionError(
1367         'Failed to load configuration for variable \'%s\' for config(s) \'%s\''
1368         '\nAvailable configs: %s' %
1369         (', '.join(isolate.config_variables),
1370         ', '.join(config_name),
1371         ', '.join(str(s) for s in isolate.by_config)))
1372   # Merge tracked and untracked variables, isolate.py doesn't care about the
1373   # trackability of the variables, only the build tool does.
1374   dependencies = [
1375     f.replace('/', os.path.sep) for f in config.tracked + config.untracked
1376   ]
1377   touched = [f.replace('/', os.path.sep) for f in config.touched]
1378   return config.command, dependencies, touched, config.read_only
1379
1380
1381 def save_isolated(isolated, data):
1382   """Writes one or multiple .isolated files.
1383
1384   Note: this reference implementation does not create child .isolated file so it
1385   always returns an empty list.
1386
1387   Returns the list of child isolated files that are included by |isolated|.
1388   """
1389   trace_inputs.write_json(isolated, data, True)
1390   return []
1391
1392
1393 def chromium_save_isolated(isolated, data, variables, algo):
1394   """Writes one or many .isolated files.
1395
1396   This slightly increases the cold cache cost but greatly reduce the warm cache
1397   cost by splitting low-churn files off the master .isolated file. It also
1398   reduces overall isolateserver memcache consumption.
1399   """
1400   slaves = []
1401
1402   def extract_into_included_isolated(prefix):
1403     new_slave = {
1404       'algo': data['algo'],
1405       'files': {},
1406       'os': data['os'],
1407       'version': data['version'],
1408     }
1409     for f in data['files'].keys():
1410       if f.startswith(prefix):
1411         new_slave['files'][f] = data['files'].pop(f)
1412     if new_slave['files']:
1413       slaves.append(new_slave)
1414
1415   # Split test/data/ in its own .isolated file.
1416   extract_into_included_isolated(os.path.join('test', 'data', ''))
1417
1418   # Split everything out of PRODUCT_DIR in its own .isolated file.
1419   if variables.get('PRODUCT_DIR'):
1420     extract_into_included_isolated(variables['PRODUCT_DIR'])
1421
1422   files = []
1423   for index, f in enumerate(slaves):
1424     slavepath = isolated[:-len('.isolated')] + '.%d.isolated' % index
1425     trace_inputs.write_json(slavepath, f, True)
1426     data.setdefault('includes', []).append(
1427         isolateserver.hash_file(slavepath, algo))
1428     files.append(os.path.basename(slavepath))
1429
1430   files.extend(save_isolated(isolated, data))
1431   return files
1432
1433
1434 class Flattenable(object):
1435   """Represents data that can be represented as a json file."""
1436   MEMBERS = ()
1437
1438   def flatten(self):
1439     """Returns a json-serializable version of itself.
1440
1441     Skips None entries.
1442     """
1443     items = ((member, getattr(self, member)) for member in self.MEMBERS)
1444     return dict((member, value) for member, value in items if value is not None)
1445
1446   @classmethod
1447   def load(cls, data, *args, **kwargs):
1448     """Loads a flattened version."""
1449     data = data.copy()
1450     out = cls(*args, **kwargs)
1451     for member in out.MEMBERS:
1452       if member in data:
1453         # Access to a protected member XXX of a client class
1454         # pylint: disable=W0212
1455         out._load_member(member, data.pop(member))
1456     if data:
1457       raise ValueError(
1458           'Found unexpected entry %s while constructing an object %s' %
1459             (data, cls.__name__), data, cls.__name__)
1460     return out
1461
1462   def _load_member(self, member, value):
1463     """Loads a member into self."""
1464     setattr(self, member, value)
1465
1466   @classmethod
1467   def load_file(cls, filename, *args, **kwargs):
1468     """Loads the data from a file or return an empty instance."""
1469     try:
1470       out = cls.load(trace_inputs.read_json(filename), *args, **kwargs)
1471       logging.debug('Loaded %s(%s)', cls.__name__, filename)
1472     except (IOError, ValueError) as e:
1473       # On failure, loads the default instance.
1474       out = cls(*args, **kwargs)
1475       logging.warn('Failed to load %s: %s', filename, e)
1476     return out
1477
1478
1479 class SavedState(Flattenable):
1480   """Describes the content of a .state file.
1481
1482   This file caches the items calculated by this script and is used to increase
1483   the performance of the script. This file is not loaded by run_isolated.py.
1484   This file can always be safely removed.
1485
1486   It is important to note that the 'files' dict keys are using native OS path
1487   separator instead of '/' used in .isolate file.
1488   """
1489   MEMBERS = (
1490     # Algorithm used to generate the hash. The only supported value is at the
1491     # time of writting 'sha-1'.
1492     'algo',
1493     # Cache of the processed command. This value is saved because .isolated
1494     # files are never loaded by isolate.py so it's the only way to load the
1495     # command safely.
1496     'command',
1497     # Cache of the files found so the next run can skip hash calculation.
1498     'files',
1499     # Path of the original .isolate file. Relative path to isolated_basedir.
1500     'isolate_file',
1501     # List of included .isolated files. Used to support/remember 'slave'
1502     # .isolated files. Relative path to isolated_basedir.
1503     'child_isolated_files',
1504     # If the generated directory tree should be read-only.
1505     'read_only',
1506     # Relative cwd to use to start the command.
1507     'relative_cwd',
1508     # GYP variables used to generate the .isolated file. Variables are saved so
1509     # a user can use isolate.py after building and the GYP variables are still
1510     # defined.
1511     'variables',
1512     # Version of the file format in format 'major.minor'. Any non-breaking
1513     # change must update minor. Any breaking change must update major.
1514     'version',
1515   )
1516
1517   def __init__(self, isolated_basedir):
1518     """Creates an empty SavedState.
1519
1520     |isolated_basedir| is the directory where the .isolated and .isolated.state
1521     files are saved.
1522     """
1523     super(SavedState, self).__init__()
1524     assert os.path.isabs(isolated_basedir), isolated_basedir
1525     assert os.path.isdir(isolated_basedir), isolated_basedir
1526     self.isolated_basedir = isolated_basedir
1527
1528     # The default algorithm used.
1529     self.algo = isolateserver.SUPPORTED_ALGOS['sha-1']
1530     self.command = []
1531     self.files = {}
1532     self.isolate_file = None
1533     self.child_isolated_files = []
1534     self.read_only = None
1535     self.relative_cwd = None
1536     self.variables = {'OS': get_flavor()}
1537     # The current version.
1538     self.version = '1.0'
1539
1540   def update(self, isolate_file, variables):
1541     """Updates the saved state with new data to keep GYP variables and internal
1542     reference to the original .isolate file.
1543     """
1544     assert os.path.isabs(isolate_file)
1545     # Convert back to a relative path. On Windows, if the isolate and
1546     # isolated files are on different drives, isolate_file will stay an absolute
1547     # path.
1548     isolate_file = safe_relpath(isolate_file, self.isolated_basedir)
1549
1550     # The same .isolate file should always be used to generate the .isolated and
1551     # .isolated.state.
1552     assert isolate_file == self.isolate_file or not self.isolate_file, (
1553         isolate_file, self.isolate_file)
1554     self.isolate_file = isolate_file
1555     self.variables.update(variables)
1556
1557   def update_isolated(self, command, infiles, touched, read_only, relative_cwd):
1558     """Updates the saved state with data necessary to generate a .isolated file.
1559
1560     The new files in |infiles| are added to self.files dict but their hash is
1561     not calculated here.
1562     """
1563     self.command = command
1564     # Add new files.
1565     for f in infiles:
1566       self.files.setdefault(f, {})
1567     for f in touched:
1568       self.files.setdefault(f, {})['T'] = True
1569     # Prune extraneous files that are not a dependency anymore.
1570     for f in set(self.files).difference(set(infiles).union(touched)):
1571       del self.files[f]
1572     if read_only is not None:
1573       self.read_only = read_only
1574     self.relative_cwd = relative_cwd
1575
1576   def to_isolated(self):
1577     """Creates a .isolated dictionary out of the saved state.
1578
1579     https://code.google.com/p/swarming/wiki/IsolatedDesign
1580     """
1581     def strip(data):
1582       """Returns a 'files' entry with only the whitelisted keys."""
1583       return dict((k, data[k]) for k in ('h', 'l', 'm', 's') if k in data)
1584
1585     out = {
1586       'algo': isolateserver.SUPPORTED_ALGOS_REVERSE[self.algo],
1587       'files': dict(
1588           (filepath, strip(data)) for filepath, data in self.files.iteritems()),
1589       'os': self.variables['OS'],
1590       'version': self.version,
1591     }
1592     if self.command:
1593       out['command'] = self.command
1594     if self.read_only is not None:
1595       out['read_only'] = self.read_only
1596     if self.relative_cwd:
1597       out['relative_cwd'] = self.relative_cwd
1598     return out
1599
1600   @property
1601   def isolate_filepath(self):
1602     """Returns the absolute path of self.isolate_file."""
1603     return os.path.normpath(
1604         os.path.join(self.isolated_basedir, self.isolate_file))
1605
1606   # Arguments number differs from overridden method
1607   @classmethod
1608   def load(cls, data, isolated_basedir):  # pylint: disable=W0221
1609     """Special case loading to disallow different OS.
1610
1611     It is not possible to load a .isolated.state files from a different OS, this
1612     file is saved in OS-specific format.
1613     """
1614     out = super(SavedState, cls).load(data, isolated_basedir)
1615     if 'os' in data:
1616       out.variables['OS'] = data['os']
1617
1618     # Converts human readable form back into the proper class type.
1619     algo = data.get('algo', 'sha-1')
1620     if not algo in isolateserver.SUPPORTED_ALGOS:
1621       raise isolateserver.ConfigError('Unknown algo \'%s\'' % out.algo)
1622     out.algo = isolateserver.SUPPORTED_ALGOS[algo]
1623
1624     # For example, 1.1 is guaranteed to be backward compatible with 1.0 code.
1625     if not re.match(r'^(\d+)\.(\d+)$', out.version):
1626       raise isolateserver.ConfigError('Unknown version \'%s\'' % out.version)
1627     if out.version.split('.', 1)[0] != '1':
1628       raise isolateserver.ConfigError(
1629           'Unsupported version \'%s\'' % out.version)
1630
1631     # The .isolate file must be valid. It could be absolute on Windows if the
1632     # drive containing the .isolate and the drive containing the .isolated files
1633     # differ.
1634     assert not os.path.isabs(out.isolate_file) or sys.platform == 'win32'
1635     assert os.path.isfile(out.isolate_filepath), out.isolate_filepath
1636     return out
1637
1638   def flatten(self):
1639     """Makes sure 'algo' is in human readable form."""
1640     out = super(SavedState, self).flatten()
1641     out['algo'] = isolateserver.SUPPORTED_ALGOS_REVERSE[out['algo']]
1642     return out
1643
1644   def __str__(self):
1645     out = '%s(\n' % self.__class__.__name__
1646     out += '  command: %s\n' % self.command
1647     out += '  files: %d\n' % len(self.files)
1648     out += '  isolate_file: %s\n' % self.isolate_file
1649     out += '  read_only: %s\n' % self.read_only
1650     out += '  relative_cwd: %s\n' % self.relative_cwd
1651     out += '  child_isolated_files: %s\n' % self.child_isolated_files
1652     out += '  variables: %s' % ''.join(
1653         '\n    %s=%s' % (k, self.variables[k]) for k in sorted(self.variables))
1654     out += ')'
1655     return out
1656
1657
1658 class CompleteState(object):
1659   """Contains all the state to run the task at hand."""
1660   def __init__(self, isolated_filepath, saved_state):
1661     super(CompleteState, self).__init__()
1662     assert isolated_filepath is None or os.path.isabs(isolated_filepath)
1663     self.isolated_filepath = isolated_filepath
1664     # Contains the data to ease developer's use-case but that is not strictly
1665     # necessary.
1666     self.saved_state = saved_state
1667
1668   @classmethod
1669   def load_files(cls, isolated_filepath):
1670     """Loads state from disk."""
1671     assert os.path.isabs(isolated_filepath), isolated_filepath
1672     isolated_basedir = os.path.dirname(isolated_filepath)
1673     return cls(
1674         isolated_filepath,
1675         SavedState.load_file(
1676             isolatedfile_to_state(isolated_filepath), isolated_basedir))
1677
1678   def load_isolate(self, cwd, isolate_file, variables, ignore_broken_items):
1679     """Updates self.isolated and self.saved_state with information loaded from a
1680     .isolate file.
1681
1682     Processes the loaded data, deduce root_dir, relative_cwd.
1683     """
1684     # Make sure to not depend on os.getcwd().
1685     assert os.path.isabs(isolate_file), isolate_file
1686     isolate_file = file_path.get_native_path_case(isolate_file)
1687     logging.info(
1688         'CompleteState.load_isolate(%s, %s, %s, %s)',
1689         cwd, isolate_file, variables, ignore_broken_items)
1690     relative_base_dir = os.path.dirname(isolate_file)
1691
1692     # Processes the variables and update the saved state.
1693     variables = process_variables(cwd, variables, relative_base_dir)
1694     self.saved_state.update(isolate_file, variables)
1695     variables = self.saved_state.variables
1696
1697     with open(isolate_file, 'r') as f:
1698       # At that point, variables are not replaced yet in command and infiles.
1699       # infiles may contain directory entries and is in posix style.
1700       command, infiles, touched, read_only = load_isolate_for_config(
1701           os.path.dirname(isolate_file), f.read(), variables)
1702     command = [eval_variables(i, variables) for i in command]
1703     infiles = [eval_variables(f, variables) for f in infiles]
1704     touched = [eval_variables(f, variables) for f in touched]
1705     # root_dir is automatically determined by the deepest root accessed with the
1706     # form '../../foo/bar'. Note that path variables must be taken in account
1707     # too, add them as if they were input files.
1708     path_variables = [variables[v] for v in PATH_VARIABLES if v in variables]
1709     root_dir = determine_root_dir(
1710         relative_base_dir, infiles + touched + path_variables)
1711     # The relative directory is automatically determined by the relative path
1712     # between root_dir and the directory containing the .isolate file,
1713     # isolate_base_dir.
1714     relative_cwd = os.path.relpath(relative_base_dir, root_dir)
1715     # Now that we know where the root is, check that the PATH_VARIABLES point
1716     # inside it.
1717     for i in PATH_VARIABLES:
1718       if i in variables:
1719         if not path_starts_with(
1720             root_dir, os.path.join(relative_base_dir, variables[i])):
1721           raise isolateserver.MappingError(
1722               'Path variable %s=%r points outside the inferred root directory'
1723               ' %s' % (i, variables[i], root_dir))
1724     # Normalize the files based to root_dir. It is important to keep the
1725     # trailing os.path.sep at that step.
1726     infiles = [
1727       relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)
1728       for f in infiles
1729     ]
1730     touched = [
1731       relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)
1732       for f in touched
1733     ]
1734     follow_symlinks = variables['OS'] != 'win'
1735     # Expand the directories by listing each file inside. Up to now, trailing
1736     # os.path.sep must be kept. Do not expand 'touched'.
1737     infiles = expand_directories_and_symlinks(
1738         root_dir,
1739         infiles,
1740         lambda x: re.match(r'.*\.(git|svn|pyc)$', x),
1741         follow_symlinks,
1742         ignore_broken_items)
1743
1744     # If we ignore broken items then remove any missing touched items.
1745     if ignore_broken_items:
1746       original_touched_count = len(touched)
1747       touched = [touch for touch in touched if os.path.exists(touch)]
1748
1749       if len(touched) != original_touched_count:
1750         logging.info('Removed %d invalid touched entries',
1751                      len(touched) - original_touched_count)
1752
1753     # Finally, update the new data to be able to generate the foo.isolated file,
1754     # the file that is used by run_isolated.py.
1755     self.saved_state.update_isolated(
1756         command, infiles, touched, read_only, relative_cwd)
1757     logging.debug(self)
1758
1759   def process_inputs(self, subdir):
1760     """Updates self.saved_state.files with the files' mode and hash.
1761
1762     If |subdir| is specified, filters to a subdirectory. The resulting .isolated
1763     file is tainted.
1764
1765     See process_input() for more information.
1766     """
1767     for infile in sorted(self.saved_state.files):
1768       if subdir and not infile.startswith(subdir):
1769         self.saved_state.files.pop(infile)
1770       else:
1771         filepath = os.path.join(self.root_dir, infile)
1772         self.saved_state.files[infile] = process_input(
1773             filepath,
1774             self.saved_state.files[infile],
1775             self.saved_state.read_only,
1776             self.saved_state.variables['OS'],
1777             self.saved_state.algo)
1778
1779   def save_files(self):
1780     """Saves self.saved_state and creates a .isolated file."""
1781     logging.debug('Dumping to %s' % self.isolated_filepath)
1782     self.saved_state.child_isolated_files = chromium_save_isolated(
1783         self.isolated_filepath,
1784         self.saved_state.to_isolated(),
1785         self.saved_state.variables,
1786         self.saved_state.algo)
1787     total_bytes = sum(
1788         i.get('s', 0) for i in self.saved_state.files.itervalues())
1789     if total_bytes:
1790       # TODO(maruel): Stats are missing the .isolated files.
1791       logging.debug('Total size: %d bytes' % total_bytes)
1792     saved_state_file = isolatedfile_to_state(self.isolated_filepath)
1793     logging.debug('Dumping to %s' % saved_state_file)
1794     trace_inputs.write_json(saved_state_file, self.saved_state.flatten(), True)
1795
1796   @property
1797   def root_dir(self):
1798     """Returns the absolute path of the root_dir to reference the .isolate file
1799     via relative_cwd.
1800
1801     So that join(root_dir, relative_cwd, basename(isolate_file)) is equivalent
1802     to isolate_filepath.
1803     """
1804     if not self.saved_state.isolate_file:
1805       raise ExecutionError('Please specify --isolate')
1806     isolate_dir = os.path.dirname(self.saved_state.isolate_filepath)
1807     # Special case '.'.
1808     if self.saved_state.relative_cwd == '.':
1809       root_dir = isolate_dir
1810     else:
1811       if not isolate_dir.endswith(self.saved_state.relative_cwd):
1812         raise ExecutionError(
1813             ('Make sure the .isolate file is in the directory that will be '
1814              'used as the relative directory. It is currently in %s and should '
1815              'be in %s') % (isolate_dir, self.saved_state.relative_cwd))
1816       # Walk back back to the root directory.
1817       root_dir = isolate_dir[:-(len(self.saved_state.relative_cwd) + 1)]
1818     return file_path.get_native_path_case(root_dir)
1819
1820   @property
1821   def resultdir(self):
1822     """Returns the absolute path containing the .isolated file.
1823
1824     It is usually equivalent to the variable PRODUCT_DIR. Uses the .isolated
1825     path as the value.
1826     """
1827     return os.path.dirname(self.isolated_filepath)
1828
1829   def __str__(self):
1830     def indent(data, indent_length):
1831       """Indents text."""
1832       spacing = ' ' * indent_length
1833       return ''.join(spacing + l for l in str(data).splitlines(True))
1834
1835     out = '%s(\n' % self.__class__.__name__
1836     out += '  root_dir: %s\n' % self.root_dir
1837     out += '  saved_state: %s)' % indent(self.saved_state, 2)
1838     return out
1839
1840
1841 def load_complete_state(options, cwd, subdir, skip_update):
1842   """Loads a CompleteState.
1843
1844   This includes data from .isolate and .isolated.state files. Never reads the
1845   .isolated file.
1846
1847   Arguments:
1848     options: Options instance generated with OptionParserIsolate. For either
1849              options.isolate and options.isolated, if the value is set, it is an
1850              absolute path.
1851     cwd: base directory to be used when loading the .isolate file.
1852     subdir: optional argument to only process file in the subdirectory, relative
1853             to CompleteState.root_dir.
1854     skip_update: Skip trying to load the .isolate file and processing the
1855                  dependencies. It is useful when not needed, like when tracing.
1856   """
1857   assert not options.isolate or os.path.isabs(options.isolate)
1858   assert not options.isolated or os.path.isabs(options.isolated)
1859   cwd = file_path.get_native_path_case(unicode(cwd))
1860   if options.isolated:
1861     # Load the previous state if it was present. Namely, "foo.isolated.state".
1862     # Note: this call doesn't load the .isolate file.
1863     complete_state = CompleteState.load_files(options.isolated)
1864   else:
1865     # Constructs a dummy object that cannot be saved. Useful for temporary
1866     # commands like 'run'.
1867     complete_state = CompleteState(None, SavedState())
1868
1869   if not options.isolate:
1870     if not complete_state.saved_state.isolate_file:
1871       if not skip_update:
1872         raise ExecutionError('A .isolate file is required.')
1873       isolate = None
1874     else:
1875       isolate = complete_state.saved_state.isolate_filepath
1876   else:
1877     isolate = options.isolate
1878     if complete_state.saved_state.isolate_file:
1879       rel_isolate = safe_relpath(
1880           options.isolate, complete_state.saved_state.isolated_basedir)
1881       if rel_isolate != complete_state.saved_state.isolate_file:
1882         raise ExecutionError(
1883             '%s and %s do not match.' % (
1884               options.isolate, complete_state.saved_state.isolate_file))
1885
1886   if not skip_update:
1887     # Then load the .isolate and expands directories.
1888     complete_state.load_isolate(
1889         cwd, isolate, options.variables, options.ignore_broken_items)
1890
1891   # Regenerate complete_state.saved_state.files.
1892   if subdir:
1893     subdir = unicode(subdir)
1894     subdir = eval_variables(subdir, complete_state.saved_state.variables)
1895     subdir = subdir.replace('/', os.path.sep)
1896
1897   if not skip_update:
1898     complete_state.process_inputs(subdir)
1899   return complete_state
1900
1901
1902 def read_trace_as_isolate_dict(complete_state, trace_blacklist):
1903   """Reads a trace and returns the .isolate dictionary.
1904
1905   Returns exceptions during the log parsing so it can be re-raised.
1906   """
1907   api = trace_inputs.get_api()
1908   logfile = complete_state.isolated_filepath + '.log'
1909   if not os.path.isfile(logfile):
1910     raise ExecutionError(
1911         'No log file \'%s\' to read, did you forget to \'trace\'?' % logfile)
1912   try:
1913     data = api.parse_log(logfile, trace_blacklist, None)
1914     exceptions = [i['exception'] for i in data if 'exception' in i]
1915     results = (i['results'] for i in data if 'results' in i)
1916     results_stripped = (i.strip_root(complete_state.root_dir) for i in results)
1917     files = set(sum((result.existent for result in results_stripped), []))
1918     tracked, touched = split_touched(files)
1919     value = generate_isolate(
1920         tracked,
1921         [],
1922         touched,
1923         complete_state.root_dir,
1924         complete_state.saved_state.variables,
1925         complete_state.saved_state.relative_cwd,
1926         trace_blacklist)
1927     return value, exceptions
1928   except trace_inputs.TracingFailure, e:
1929     raise ExecutionError(
1930         'Reading traces failed for: %s\n%s' %
1931           (' '.join(complete_state.saved_state.command), str(e)))
1932
1933
1934 def print_all(comment, data, stream):
1935   """Prints a complete .isolate file and its top-level file comment into a
1936   stream.
1937   """
1938   if comment:
1939     stream.write(comment)
1940   pretty_print(data, stream)
1941
1942
1943 def merge(complete_state, trace_blacklist):
1944   """Reads a trace and merges it back into the source .isolate file."""
1945   value, exceptions = read_trace_as_isolate_dict(
1946       complete_state, trace_blacklist)
1947
1948   # Now take that data and union it into the original .isolate file.
1949   with open(complete_state.saved_state.isolate_filepath, 'r') as f:
1950     prev_content = f.read()
1951   isolate_dir = os.path.dirname(complete_state.saved_state.isolate_filepath)
1952   prev_config = load_isolate_as_config(
1953       isolate_dir,
1954       eval_content(prev_content),
1955       extract_comment(prev_content))
1956   new_config = load_isolate_as_config(isolate_dir, value, '')
1957   config = union(prev_config, new_config)
1958   data = config.make_isolate_file()
1959   print('Updating %s' % complete_state.saved_state.isolate_file)
1960   with open(complete_state.saved_state.isolate_filepath, 'wb') as f:
1961     print_all(config.file_comment, data, f)
1962   if exceptions:
1963     # It got an exception, raise the first one.
1964     raise \
1965         exceptions[0][0], \
1966         exceptions[0][1], \
1967         exceptions[0][2]
1968
1969
1970 ### Commands.
1971
1972
1973 def CMDarchive(parser, args):
1974   """Creates a .isolated file and uploads the tree to an isolate server.
1975
1976   All the files listed in the .isolated file are put in the isolate server
1977   cache via isolateserver.py.
1978   """
1979   parser.add_option('--subdir', help='Filters to a subdirectory')
1980   options, args = parser.parse_args(args)
1981   if args:
1982     parser.error('Unsupported argument: %s' % args)
1983
1984   with tools.Profiler('GenerateHashtable'):
1985     success = False
1986     try:
1987       complete_state = load_complete_state(
1988           options, os.getcwd(), options.subdir, False)
1989       if not options.outdir:
1990         options.outdir = os.path.join(
1991             os.path.dirname(complete_state.isolated_filepath), 'hashtable')
1992       # Make sure that complete_state isn't modified until save_files() is
1993       # called, because any changes made to it here will propagate to the files
1994       # created (which is probably not intended).
1995       complete_state.save_files()
1996
1997       infiles = complete_state.saved_state.files
1998       # Add all the .isolated files.
1999       isolated_hash = []
2000       isolated_files = [
2001         options.isolated,
2002       ] + complete_state.saved_state.child_isolated_files
2003       for item in isolated_files:
2004         item_path = os.path.join(
2005             os.path.dirname(complete_state.isolated_filepath), item)
2006         # Do not use isolateserver.hash_file() here because the file is
2007         # likely smallish (under 500kb) and its file size is needed.
2008         with open(item_path, 'rb') as f:
2009           content = f.read()
2010         isolated_hash.append(
2011             complete_state.saved_state.algo(content).hexdigest())
2012         isolated_metadata = {
2013           'h': isolated_hash[-1],
2014           's': len(content),
2015           'priority': '0'
2016         }
2017         infiles[item_path] = isolated_metadata
2018
2019       logging.info('Creating content addressed object store with %d item',
2020                    len(infiles))
2021
2022       if is_url(options.outdir):
2023         isolateserver.upload_tree(
2024             base_url=options.outdir,
2025             indir=complete_state.root_dir,
2026             infiles=infiles,
2027             namespace='default-gzip')
2028       else:
2029         recreate_tree(
2030             outdir=options.outdir,
2031             indir=complete_state.root_dir,
2032             infiles=infiles,
2033             action=run_isolated.HARDLINK_WITH_FALLBACK,
2034             as_hash=True)
2035       success = True
2036       print('%s  %s' % (isolated_hash[0], os.path.basename(options.isolated)))
2037     finally:
2038       # If the command failed, delete the .isolated file if it exists. This is
2039       # important so no stale swarm job is executed.
2040       if not success and os.path.isfile(options.isolated):
2041         os.remove(options.isolated)
2042   return not success
2043
2044
2045 def CMDcheck(parser, args):
2046   """Checks that all the inputs are present and generates .isolated."""
2047   parser.add_option('--subdir', help='Filters to a subdirectory')
2048   options, args = parser.parse_args(args)
2049   if args:
2050     parser.error('Unsupported argument: %s' % args)
2051
2052   complete_state = load_complete_state(
2053       options, os.getcwd(), options.subdir, False)
2054
2055   # Nothing is done specifically. Just store the result and state.
2056   complete_state.save_files()
2057   return 0
2058
2059
2060 CMDhashtable = CMDarchive
2061
2062
2063 def CMDmerge(parser, args):
2064   """Reads and merges the data from the trace back into the original .isolate.
2065
2066   Ignores --outdir.
2067   """
2068   parser.require_isolated = False
2069   add_trace_option(parser)
2070   options, args = parser.parse_args(args)
2071   if args:
2072     parser.error('Unsupported argument: %s' % args)
2073
2074   complete_state = load_complete_state(options, os.getcwd(), None, False)
2075   blacklist = trace_inputs.gen_blacklist(options.trace_blacklist)
2076   merge(complete_state, blacklist)
2077   return 0
2078
2079
2080 def CMDread(parser, args):
2081   """Reads the trace file generated with command 'trace'.
2082
2083   Ignores --outdir.
2084   """
2085   parser.require_isolated = False
2086   add_trace_option(parser)
2087   parser.add_option(
2088       '--skip-refresh', action='store_true',
2089       help='Skip reading .isolate file and do not refresh the hash of '
2090            'dependencies')
2091   parser.add_option(
2092       '-m', '--merge', action='store_true',
2093       help='merge the results back in the .isolate file instead of printing')
2094   options, args = parser.parse_args(args)
2095   if args:
2096     parser.error('Unsupported argument: %s' % args)
2097
2098   complete_state = load_complete_state(
2099       options, os.getcwd(), None, options.skip_refresh)
2100   blacklist = trace_inputs.gen_blacklist(options.trace_blacklist)
2101   value, exceptions = read_trace_as_isolate_dict(complete_state, blacklist)
2102   if options.merge:
2103     merge(complete_state, blacklist)
2104   else:
2105     pretty_print(value, sys.stdout)
2106
2107   if exceptions:
2108     # It got an exception, raise the first one.
2109     raise \
2110         exceptions[0][0], \
2111         exceptions[0][1], \
2112         exceptions[0][2]
2113   return 0
2114
2115
2116 def CMDremap(parser, args):
2117   """Creates a directory with all the dependencies mapped into it.
2118
2119   Useful to test manually why a test is failing. The target executable is not
2120   run.
2121   """
2122   parser.require_isolated = False
2123   options, args = parser.parse_args(args)
2124   if args:
2125     parser.error('Unsupported argument: %s' % args)
2126   complete_state = load_complete_state(options, os.getcwd(), None, False)
2127
2128   if not options.outdir:
2129     options.outdir = run_isolated.make_temp_dir(
2130         'isolate', complete_state.root_dir)
2131   else:
2132     if is_url(options.outdir):
2133       parser.error('Can\'t use url for --outdir with mode remap.')
2134     if not os.path.isdir(options.outdir):
2135       os.makedirs(options.outdir)
2136   print('Remapping into %s' % options.outdir)
2137   if len(os.listdir(options.outdir)):
2138     raise ExecutionError('Can\'t remap in a non-empty directory')
2139   recreate_tree(
2140       outdir=options.outdir,
2141       indir=complete_state.root_dir,
2142       infiles=complete_state.saved_state.files,
2143       action=run_isolated.HARDLINK_WITH_FALLBACK,
2144       as_hash=False)
2145   if complete_state.saved_state.read_only:
2146     run_isolated.make_writable(options.outdir, True)
2147
2148   if complete_state.isolated_filepath:
2149     complete_state.save_files()
2150   return 0
2151
2152
2153 def CMDrewrite(parser, args):
2154   """Rewrites a .isolate file into the canonical format."""
2155   parser.require_isolated = False
2156   options, args = parser.parse_args(args)
2157   if args:
2158     parser.error('Unsupported argument: %s' % args)
2159
2160   if options.isolated:
2161     # Load the previous state if it was present. Namely, "foo.isolated.state".
2162     complete_state = CompleteState.load_files(options.isolated)
2163     isolate = options.isolate or complete_state.saved_state.isolate_filepath
2164   else:
2165     isolate = options.isolate
2166   if not isolate:
2167     parser.error('--isolate is required.')
2168
2169   with open(isolate, 'r') as f:
2170     content = f.read()
2171   config = load_isolate_as_config(
2172       os.path.dirname(os.path.abspath(isolate)),
2173       eval_content(content),
2174       extract_comment(content))
2175   data = config.make_isolate_file()
2176   print('Updating %s' % isolate)
2177   with open(isolate, 'wb') as f:
2178     print_all(config.file_comment, data, f)
2179   return 0
2180
2181
2182 @subcommand.usage('-- [extra arguments]')
2183 def CMDrun(parser, args):
2184   """Runs the test executable in an isolated (temporary) directory.
2185
2186   All the dependencies are mapped into the temporary directory and the
2187   directory is cleaned up after the target exits. Warning: if --outdir is
2188   specified, it is deleted upon exit.
2189
2190   Argument processing stops at -- and these arguments are appended to the
2191   command line of the target to run. For example, use:
2192     isolate.py run --isolated foo.isolated -- --gtest_filter=Foo.Bar
2193   """
2194   parser.require_isolated = False
2195   parser.add_option(
2196       '--skip-refresh', action='store_true',
2197       help='Skip reading .isolate file and do not refresh the hash of '
2198            'dependencies')
2199   options, args = parser.parse_args(args)
2200   if options.outdir and is_url(options.outdir):
2201     parser.error('Can\'t use url for --outdir with mode run.')
2202
2203   complete_state = load_complete_state(
2204       options, os.getcwd(), None, options.skip_refresh)
2205   cmd = complete_state.saved_state.command + args
2206   if not cmd:
2207     raise ExecutionError('No command to run.')
2208
2209   cmd = tools.fix_python_path(cmd)
2210   try:
2211     root_dir = complete_state.root_dir
2212     if not options.outdir:
2213       if not os.path.isabs(root_dir):
2214         root_dir = os.path.join(os.path.dirname(options.isolated), root_dir)
2215       options.outdir = run_isolated.make_temp_dir('isolate', root_dir)
2216     else:
2217       if not os.path.isdir(options.outdir):
2218         os.makedirs(options.outdir)
2219     recreate_tree(
2220         outdir=options.outdir,
2221         indir=root_dir,
2222         infiles=complete_state.saved_state.files,
2223         action=run_isolated.HARDLINK_WITH_FALLBACK,
2224         as_hash=False)
2225     cwd = os.path.normpath(
2226         os.path.join(options.outdir, complete_state.saved_state.relative_cwd))
2227     if not os.path.isdir(cwd):
2228       # It can happen when no files are mapped from the directory containing the
2229       # .isolate file. But the directory must exist to be the current working
2230       # directory.
2231       os.makedirs(cwd)
2232     if complete_state.saved_state.read_only:
2233       run_isolated.make_writable(options.outdir, True)
2234     logging.info('Running %s, cwd=%s' % (cmd, cwd))
2235     result = subprocess.call(cmd, cwd=cwd)
2236   finally:
2237     if options.outdir:
2238       run_isolated.rmtree(options.outdir)
2239
2240   if complete_state.isolated_filepath:
2241     complete_state.save_files()
2242   return result
2243
2244
2245 @subcommand.usage('-- [extra arguments]')
2246 def CMDtrace(parser, args):
2247   """Traces the target using trace_inputs.py.
2248
2249   It runs the executable without remapping it, and traces all the files it and
2250   its child processes access. Then the 'merge' command can be used to generate
2251   an updated .isolate file out of it or the 'read' command to print it out to
2252   stdout.
2253
2254   Argument processing stops at -- and these arguments are appended to the
2255   command line of the target to run. For example, use:
2256     isolate.py trace --isolated foo.isolated -- --gtest_filter=Foo.Bar
2257   """
2258   add_trace_option(parser)
2259   parser.add_option(
2260       '-m', '--merge', action='store_true',
2261       help='After tracing, merge the results back in the .isolate file')
2262   parser.add_option(
2263       '--skip-refresh', action='store_true',
2264       help='Skip reading .isolate file and do not refresh the hash of '
2265            'dependencies')
2266   options, args = parser.parse_args(args)
2267
2268   complete_state = load_complete_state(
2269       options, os.getcwd(), None, options.skip_refresh)
2270   cmd = complete_state.saved_state.command + args
2271   if not cmd:
2272     raise ExecutionError('No command to run.')
2273   cmd = tools.fix_python_path(cmd)
2274   cwd = os.path.normpath(os.path.join(
2275       unicode(complete_state.root_dir),
2276       complete_state.saved_state.relative_cwd))
2277   cmd[0] = os.path.normpath(os.path.join(cwd, cmd[0]))
2278   if not os.path.isfile(cmd[0]):
2279     raise ExecutionError(
2280         'Tracing failed for: %s\nIt doesn\'t exit' % ' '.join(cmd))
2281   logging.info('Running %s, cwd=%s' % (cmd, cwd))
2282   api = trace_inputs.get_api()
2283   logfile = complete_state.isolated_filepath + '.log'
2284   api.clean_trace(logfile)
2285   out = None
2286   try:
2287     with api.get_tracer(logfile) as tracer:
2288       result, out = tracer.trace(
2289           cmd,
2290           cwd,
2291           'default',
2292           True)
2293   except trace_inputs.TracingFailure, e:
2294     raise ExecutionError('Tracing failed for: %s\n%s' % (' '.join(cmd), str(e)))
2295
2296   if result:
2297     logging.error(
2298         'Tracer exited with %d, which means the tests probably failed so the '
2299         'trace is probably incomplete.', result)
2300     logging.info(out)
2301
2302   complete_state.save_files()
2303
2304   if options.merge:
2305     blacklist = trace_inputs.gen_blacklist(options.trace_blacklist)
2306     merge(complete_state, blacklist)
2307
2308   return result
2309
2310
2311 def _process_variable_arg(_option, _opt, _value, parser):
2312   if not parser.rargs:
2313     raise optparse.OptionValueError(
2314         'Please use --variable FOO=BAR or --variable FOO BAR')
2315   k = parser.rargs.pop(0)
2316   if '=' in k:
2317     parser.values.variables.append(tuple(k.split('=', 1)))
2318   else:
2319     if not parser.rargs:
2320       raise optparse.OptionValueError(
2321           'Please use --variable FOO=BAR or --variable FOO BAR')
2322     v = parser.rargs.pop(0)
2323     parser.values.variables.append((k, v))
2324
2325
2326 def add_variable_option(parser):
2327   """Adds --isolated and --variable to an OptionParser."""
2328   parser.add_option(
2329       '-s', '--isolated',
2330       metavar='FILE',
2331       help='.isolated file to generate or read')
2332   # Keep for compatibility. TODO(maruel): Remove once not used anymore.
2333   parser.add_option(
2334       '-r', '--result',
2335       dest='isolated',
2336       help=optparse.SUPPRESS_HELP)
2337   default_variables = [('OS', get_flavor())]
2338   if sys.platform in ('win32', 'cygwin'):
2339     default_variables.append(('EXECUTABLE_SUFFIX', '.exe'))
2340   else:
2341     default_variables.append(('EXECUTABLE_SUFFIX', ''))
2342   parser.add_option(
2343       '-V', '--variable',
2344       action='callback',
2345       callback=_process_variable_arg,
2346       default=default_variables,
2347       dest='variables',
2348       metavar='FOO BAR',
2349       help='Variables to process in the .isolate file, default: %default. '
2350             'Variables are persistent accross calls, they are saved inside '
2351             '<.isolated>.state')
2352
2353
2354 def add_trace_option(parser):
2355   """Adds --trace-blacklist to the parser."""
2356   parser.add_option(
2357       '--trace-blacklist',
2358       action='append', default=list(DEFAULT_BLACKLIST),
2359       help='List of regexp to use as blacklist filter for files to consider '
2360            'important, not to be confused with --blacklist which blacklists '
2361            'test case.')
2362
2363
2364 def parse_isolated_option(parser, options, cwd, require_isolated):
2365   """Processes --isolated."""
2366   if options.isolated:
2367     options.isolated = os.path.normpath(
2368         os.path.join(cwd, options.isolated.replace('/', os.path.sep)))
2369   if require_isolated and not options.isolated:
2370     parser.error('--isolated is required.')
2371   if options.isolated and not options.isolated.endswith('.isolated'):
2372     parser.error('--isolated value must end with \'.isolated\'')
2373
2374
2375 def parse_variable_option(options):
2376   """Processes --variable."""
2377   # TODO(benrg): Maybe we should use a copy of gyp's NameValueListToDict here,
2378   # but it wouldn't be backward compatible.
2379   def try_make_int(s):
2380     """Converts a value to int if possible, converts to unicode otherwise."""
2381     try:
2382       return int(s)
2383     except ValueError:
2384       return s.decode('utf-8')
2385   options.variables = dict((k, try_make_int(v)) for k, v in options.variables)
2386
2387
2388 class OptionParserIsolate(tools.OptionParserWithLogging):
2389   """Adds automatic --isolate, --isolated, --out and --variable handling."""
2390   # Set it to False if it is not required, e.g. it can be passed on but do not
2391   # fail if not given.
2392   require_isolated = True
2393
2394   def __init__(self, **kwargs):
2395     tools.OptionParserWithLogging.__init__(
2396         self,
2397         verbose=int(os.environ.get('ISOLATE_DEBUG', 0)),
2398         **kwargs)
2399     group = optparse.OptionGroup(self, "Common options")
2400     group.add_option(
2401         '-i', '--isolate',
2402         metavar='FILE',
2403         help='.isolate file to load the dependency data from')
2404     add_variable_option(group)
2405     group.add_option(
2406         '-o', '--outdir', metavar='DIR',
2407         help='Directory used to recreate the tree or store the hash table. '
2408              'Defaults: run|remap: a /tmp subdirectory, others: '
2409              'defaults to the directory containing --isolated')
2410     group.add_option(
2411         '--ignore_broken_items', action='store_true',
2412         default=bool(os.environ.get('ISOLATE_IGNORE_BROKEN_ITEMS')),
2413         help='Indicates that invalid entries in the isolated file to be '
2414              'only be logged and not stop processing. Defaults to True if '
2415              'env var ISOLATE_IGNORE_BROKEN_ITEMS is set')
2416     self.add_option_group(group)
2417
2418   def parse_args(self, *args, **kwargs):
2419     """Makes sure the paths make sense.
2420
2421     On Windows, / and \ are often mixed together in a path.
2422     """
2423     options, args = tools.OptionParserWithLogging.parse_args(
2424         self, *args, **kwargs)
2425     if not self.allow_interspersed_args and args:
2426       self.error('Unsupported argument: %s' % args)
2427
2428     cwd = file_path.get_native_path_case(unicode(os.getcwd()))
2429     parse_isolated_option(self, options, cwd, self.require_isolated)
2430     parse_variable_option(options)
2431
2432     if options.isolate:
2433       # TODO(maruel): Work with non-ASCII.
2434       # The path must be in native path case for tracing purposes.
2435       options.isolate = unicode(options.isolate).replace('/', os.path.sep)
2436       options.isolate = os.path.normpath(os.path.join(cwd, options.isolate))
2437       options.isolate = file_path.get_native_path_case(options.isolate)
2438
2439     if options.outdir and not is_url(options.outdir):
2440       options.outdir = unicode(options.outdir).replace('/', os.path.sep)
2441       # outdir doesn't need native path case since tracing is never done from
2442       # there.
2443       options.outdir = os.path.normpath(os.path.join(cwd, options.outdir))
2444
2445     return options, args
2446
2447
2448 def main(argv):
2449   dispatcher = subcommand.CommandDispatcher(__name__)
2450   try:
2451     return dispatcher.execute(OptionParserIsolate(version=__version__), argv)
2452   except (
2453       ExecutionError,
2454       isolateserver.ConfigError,
2455       isolateserver.MappingError) as e:
2456     sys.stderr.write('\nError: ')
2457     sys.stderr.write(str(e))
2458     sys.stderr.write('\n')
2459     return 1
2460
2461
2462 if __name__ == '__main__':
2463   fix_encoding.fix_encoding()
2464   tools.disable_buffering()
2465   colorama.init()
2466   sys.exit(main(sys.argv[1:]))