1 # Copyright 2014 The Swarming Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 that
3 # can be found in the LICENSE file.
5 """Contains logic to parse .isolate files.
7 This module doesn't touch the file system. It's the job of the client code to do
8 I/O on behalf of this module.
10 See more information at
11 https://code.google.com/p/swarming/wiki/IsolateDesign
12 https://code.google.com/p/swarming/wiki/IsolateUserGuide
25 from utils import short_expression_finder
27 # Files that should be 0-length when mapped.
28 KEY_TOUCHED = 'isolate_dependency_touched'
29 # Files that should be tracked by the build tool.
30 KEY_TRACKED = 'isolate_dependency_tracked'
31 # Files that should not be tracked by the build tool.
32 KEY_UNTRACKED = 'isolate_dependency_untracked'
34 # Valid variable name.
35 VALID_VARIABLE = '[A-Za-z_][A-Za-z_0-9]*'
38 def determine_root_dir(relative_root, infiles):
39 """For a list of infiles, determines the deepest root directory that is
40 referenced indirectly.
42 All arguments must be using os.path.sep.
44 # The trick used to determine the root directory is to look at "how far" back
45 # up it is looking up.
46 deepest_root = relative_root
49 while i.startswith('..' + os.path.sep):
51 assert not i.startswith(os.path.sep)
52 x = os.path.dirname(x)
53 if deepest_root.startswith(x):
56 'determine_root_dir(%s, %d files) -> %s',
57 relative_root, len(infiles), deepest_root)
61 def replace_variable(part, variables):
62 m = re.match(r'<\((' + VALID_VARIABLE + ')\)', part)
64 if m.group(1) not in variables:
65 raise isolateserver.ConfigError(
66 'Variable "%s" was not found in %s.\nDid you forget to specify '
67 '--path-variable?' % (m.group(1), variables))
68 return variables[m.group(1)]
72 def eval_variables(item, variables):
73 """Replaces the .isolate variables in a string item.
75 Note that the .isolate format is a subset of the .gyp dialect.
78 replace_variable(p, variables)
79 for p in re.split(r'(<\(' + VALID_VARIABLE + '\))', item))
82 def split_touched(files):
83 """Splits files that are touched vs files that are read."""
91 return tracked, touched
94 def pretty_print(variables, stdout):
95 """Outputs a .isolate file from the decoded variables.
97 The .isolate format is GYP compatible.
99 Similar to pprint.print() but with NIH syndrome.
101 # Order the dictionary keys by these keys in priority.
103 'variables', 'condition', 'command', 'read_only',
104 KEY_TRACKED, KEY_UNTRACKED)
107 """Gives priority to 'most important' keys before the others."""
109 return str(ORDER.index(x))
112 def loop_list(indent, items):
114 if isinstance(item, basestring):
115 stdout.write('%s\'%s\',\n' % (indent, item))
116 elif isinstance(item, dict):
117 stdout.write('%s{\n' % indent)
118 loop_dict(indent + ' ', item)
119 stdout.write('%s},\n' % indent)
120 elif isinstance(item, list):
121 # A list inside a list will write the first item embedded.
122 stdout.write('%s[' % indent)
123 for index, i in enumerate(item):
124 if isinstance(i, basestring):
126 '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))
127 elif isinstance(i, dict):
129 loop_dict(indent + ' ', i)
130 if index != len(item) - 1:
134 stdout.write('%s}%s' % (indent, x))
141 def loop_dict(indent, items):
142 for key in sorted(items, key=sorting_key):
144 stdout.write("%s'%s': " % (indent, key))
145 if isinstance(item, dict):
147 loop_dict(indent + ' ', item)
148 stdout.write(indent + '},\n')
149 elif isinstance(item, list):
151 loop_list(indent + ' ', item)
152 stdout.write(indent + '],\n')
153 elif isinstance(item, basestring):
155 '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))
156 elif isinstance(item, (int, bool)) or item is None:
157 stdout.write('%s,\n' % item)
162 loop_dict(' ', variables)
166 def print_all(comment, data, stream):
167 """Prints a complete .isolate file and its top-level file comment into a
171 stream.write(comment)
172 pretty_print(data, stream)
175 def extract_comment(content):
176 """Extracts file level comment."""
178 for line in content.splitlines(True):
179 if line.startswith('#'):
186 def eval_content(content):
187 """Evaluates a python file and return the value defined in it.
189 Used in practice for .isolate files.
191 globs = {'__builtins__': None}
194 value = eval(content, globs, locs)
195 except TypeError as e:
196 e.args = list(e.args) + [content]
198 assert locs == {}, locs
199 assert globs == {'__builtins__': None}, globs
203 def match_configs(expr, config_variables, all_configs):
204 """Returns the list of values from |values| that match the condition |expr|.
207 expr: string that is evaluatable with eval(). It is a GYP condition.
208 config_variables: list of the name of the variables.
209 all_configs: list of the list of possible values.
211 If a variable is not referenced at all, it is marked as unbounded (free) with
214 # It is more than just eval'ing the variable, it needs to be double checked to
215 # see if the variable is referenced at all. If not, the variable is free
217 # TODO(maruel): Use the intelligent way by inspecting expr instead of doing
218 # trial and error to figure out which variable is bound.
220 for bound_variables in itertools.product(
221 (True, False), repeat=len(config_variables)):
222 # Add the combination of variables bound.
225 [c for c, b in zip(config_variables, bound_variables) if b],
227 tuple(v if b else None for v, b in zip(line, bound_variables))
228 for line in all_configs)
232 for variables, configs in combinations:
233 # Strip variables and see if expr can still be evaluated.
234 for values in configs:
235 globs = {'__builtins__': None}
236 globs.update(zip(variables, (v for v in values if v is not None)))
238 assertion = eval(expr, globs, {})
241 if not isinstance(assertion, bool):
242 raise isolateserver.ConfigError('Invalid condition')
248 def verify_variables(variables):
249 """Verifies the |variables| dictionary is in the expected format."""
257 assert isinstance(variables, dict), variables
258 assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()
259 for name, value in variables.iteritems():
260 if name == 'read_only':
261 assert value in (0, 1, 2, None), value
263 assert isinstance(value, list), value
264 assert all(isinstance(i, basestring) for i in value), value
267 def verify_ast(expr, variables_and_values):
268 """Verifies that |expr| is of the form
269 expr ::= expr ( "or" | "and" ) expr
270 | identifier "==" ( string | int )
271 Also collects the variable identifiers and string/int values in the dict
272 |variables_and_values|, in the form {'var': set([val1, val2, ...]), ...}.
274 assert isinstance(expr, (ast.BoolOp, ast.Compare))
275 if isinstance(expr, ast.BoolOp):
276 assert isinstance(expr.op, (ast.And, ast.Or))
277 for subexpr in expr.values:
278 verify_ast(subexpr, variables_and_values)
280 assert isinstance(expr.left.ctx, ast.Load)
281 assert len(expr.ops) == 1
282 assert isinstance(expr.ops[0], ast.Eq)
283 var_values = variables_and_values.setdefault(expr.left.id, set())
284 rhs = expr.comparators[0]
285 assert isinstance(rhs, (ast.Str, ast.Num))
286 var_values.add(rhs.n if isinstance(rhs, ast.Num) else rhs.s)
289 def verify_condition(condition, variables_and_values):
290 """Verifies the |condition| dictionary is in the expected format.
291 See verify_ast() for the meaning of |variables_and_values|.
293 VALID_INSIDE_CONDITION = ['variables']
294 assert isinstance(condition, list), condition
295 assert len(condition) == 2, condition
296 expr, then = condition
298 test_ast = compile(expr, '<condition>', 'eval', ast.PyCF_ONLY_AST)
299 verify_ast(test_ast.body, variables_and_values)
301 assert isinstance(then, dict), then
302 assert set(VALID_INSIDE_CONDITION).issuperset(set(then)), then.keys()
303 if not 'variables' in then:
304 raise isolateserver.ConfigError('Missing \'variables\' in condition %s' %
306 verify_variables(then['variables'])
309 def verify_root(value, variables_and_values):
310 """Verifies that |value| is the parsed form of a valid .isolate file.
312 See verify_ast() for the meaning of |variables_and_values|.
314 VALID_ROOTS = ['includes', 'conditions', 'variables']
315 assert isinstance(value, dict), value
316 assert set(VALID_ROOTS).issuperset(set(value)), value.keys()
318 includes = value.get('includes', [])
319 assert isinstance(includes, list), includes
320 for include in includes:
321 assert isinstance(include, basestring), include
323 conditions = value.get('conditions', [])
324 assert isinstance(conditions, list), conditions
325 for condition in conditions:
326 verify_condition(condition, variables_and_values)
328 variables = value.get('variables', {})
329 verify_variables(variables)
332 def remove_weak_dependencies(values, key, item, item_configs):
333 """Removes any configs from this key if the item is already under a
336 if key == KEY_TOUCHED:
337 item_configs = set(item_configs)
338 for stronger_key in (KEY_TRACKED, KEY_UNTRACKED):
340 item_configs -= values[stronger_key][item]
347 def remove_repeated_dependencies(folders, key, item, item_configs):
348 """Removes any configs from this key if the item is in a folder that is
351 if key in (KEY_UNTRACKED, KEY_TRACKED, KEY_TOUCHED):
352 item_configs = set(item_configs)
353 for (folder, configs) in folders.iteritems():
354 if folder != item and item.startswith(folder):
355 item_configs -= configs
360 def get_folders(values_dict):
361 """Returns a dict of all the folders in the given value_dict."""
363 (item, configs) for (item, configs) in values_dict.iteritems()
364 if item.endswith('/')
368 def invert_map(variables):
369 """Converts {config: {deptype: list(depvals)}} to
370 {deptype: {depval: set(configs)}}.
379 out = dict((key, {}) for key in KEYS)
380 for config, values in variables.iteritems():
383 items = [tuple(values[key])] if key in values else []
384 elif key == 'read_only':
385 items = [values[key]] if key in values else []
387 assert key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED)
388 items = values.get(key, [])
390 out[key].setdefault(item, set()).add(config)
394 def reduce_inputs(values):
395 """Reduces the output of invert_map() to the strictest minimum list.
397 Looks at each individual file and directory, maps where they are used and
398 reconstructs the inverse dictionary.
400 Returns the minimized dictionary.
410 # Folders can only live in KEY_UNTRACKED.
411 folders = get_folders(values.get(KEY_UNTRACKED, {}))
413 out = dict((key, {}) for key in KEYS)
415 for item, item_configs in values.get(key, {}).iteritems():
416 item_configs = remove_weak_dependencies(values, key, item, item_configs)
417 item_configs = remove_repeated_dependencies(
418 folders, key, item, item_configs)
420 out[key][item] = item_configs
424 def convert_map_to_isolate_dict(values, config_variables):
425 """Regenerates back a .isolate configuration dict from files and dirs
426 mappings generated from reduce_inputs().
428 # Gather a list of configurations for set inversion later.
429 all_mentioned_configs = set()
430 for configs_by_item in values.itervalues():
431 for configs in configs_by_item.itervalues():
432 all_mentioned_configs.update(configs)
434 # Invert the mapping to make it dict first.
437 for item, configs in values[key].iteritems():
438 then = conditions.setdefault(frozenset(configs), {})
439 variables = then.setdefault('variables', {})
441 if key == 'read_only':
442 if not isinstance(item, int):
443 raise isolateserver.ConfigError(
444 'Unexpected entry type %r for key %s' % (item, key))
445 variables[key] = item
446 elif key == 'command':
447 if not isinstance(item, tuple):
448 raise isolateserver.ConfigError(
449 'Unexpected entry type %r for key %s' % (item, key))
451 raise isolateserver.ConfigError('Unexpected duplicate key %s' % key)
453 raise isolateserver.ConfigError(
454 'Expected non empty entry in %s' % key)
455 variables[key] = list(item)
456 elif key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED):
457 if not isinstance(item, basestring):
458 raise isolateserver.ConfigError('Unexpected entry type %r' % item)
460 raise isolateserver.ConfigError(
461 'Expected non empty entry in %s' % key)
462 # The list of items (files or dirs). Append the new item and keep
464 l = variables.setdefault(key, [])
468 raise isolateserver.ConfigError('Unexpected key %s' % key)
470 if all_mentioned_configs:
471 # Change [(1, 2), (3, 4)] to [set(1, 3), set(2, 4)]
472 config_values = map(set, zip(*all_mentioned_configs))
473 for i in config_values:
475 sef = short_expression_finder.ShortExpressionFinder(
476 zip(config_variables, config_values))
477 conditions = sorted([sef.get_expr(c), v] for c, v in conditions.iteritems())
480 out = {'conditions': conditions}
483 # Extract the global.
490 class ConfigSettings(object):
491 """Represents the dependency variables for a single build configuration.
493 The structure is immutable.
495 .touch, .tracked and .untracked are the list of dependencies. The items in
496 these lists use '/' as a path separator.
497 .command and .isolate_dir describe how to run the command. .isolate_dir uses
498 the OS' native path separator. It must be an absolute path, it's the path
499 where to start the command from.
500 .read_only describe how to map the files.
502 def __init__(self, values, isolate_dir):
503 verify_variables(values)
504 if isolate_dir is None:
505 # It must be an empty object if isolate_dir is None.
506 assert values == {}, values
508 # Otherwise, the path must be absolute.
509 assert os.path.isabs(isolate_dir), isolate_dir
510 self.touched = sorted(values.get(KEY_TOUCHED, []))
511 self.tracked = sorted(values.get(KEY_TRACKED, []))
512 self.untracked = sorted(values.get(KEY_UNTRACKED, []))
513 self.command = values.get('command', [])[:]
514 self.isolate_dir = isolate_dir
515 self.read_only = values.get('read_only')
517 def union(self, rhs):
518 """Merges two config settings together into a new instance.
520 A new instance is not created and self or rhs is returned if the other
521 object is the empty object.
523 self has priority over rhs for .command. Use the same .isolate_dir as the
524 one having a .command.
526 Dependencies listed in rhs are patch adjusted ONLY if they don't start with
527 a path variable, e.g. the characters '<('.
529 # When an object has .isolate_dir == None, it means it is the empty object.
530 if rhs.isolate_dir is None:
532 if self.isolate_dir is None:
535 if sys.platform == 'win32':
536 assert self.isolate_dir[0].lower() == rhs.isolate_dir[0].lower()
538 # Takes the difference between the two isolate_dir. Note that while
539 # isolate_dir is in native path case, all other references are in posix.
540 l_rel_cwd, r_rel_cwd = self.isolate_dir, rhs.isolate_dir
541 if self.command or rhs.command:
542 use_rhs = bool(not self.command and rhs.command)
544 # If self doesn't define any file, use rhs.
545 use_rhs = not bool(self.touched or self.tracked or self.untracked)
547 # Rebase files in rhs.
548 l_rel_cwd, r_rel_cwd = r_rel_cwd, l_rel_cwd
550 rebase_path = os.path.relpath(r_rel_cwd, l_rel_cwd).replace(
553 if f.startswith('<(') or rebase_path == '.':
555 return posixpath.join(rebase_path, f)
558 """Rebase items in either lhs or rhs, as needed."""
561 return sorted(l + map(rebase_item, r))
564 KEY_TOUCHED: map_both(self.touched, rhs.touched),
565 KEY_TRACKED: map_both(self.tracked, rhs.tracked),
566 KEY_UNTRACKED: map_both(self.untracked, rhs.untracked),
567 'command': self.command or rhs.command,
568 'read_only': rhs.read_only if self.read_only is None else self.read_only,
570 return ConfigSettings(var, l_rel_cwd)
573 """Converts the object into a dict."""
576 out['command'] = self.command
578 out[KEY_TOUCHED] = self.touched
580 out[KEY_TRACKED] = self.tracked
582 out[KEY_UNTRACKED] = self.untracked
583 if self.read_only is not None:
584 out['read_only'] = self.read_only
585 # TODO(maruel): Probably better to not output it if command is None?
586 if self.isolate_dir is not None:
587 out['isolate_dir'] = self.isolate_dir
591 """Returns a short representation useful for debugging."""
593 '\n ' + f for f in (self.touched + self.tracked + self.untracked))
594 return 'ConfigSettings(%s, %s, %s, %s)' % (
601 def _safe_index(l, k):
608 def _get_map_keys(dest_keys, in_keys):
609 """Returns a tuple of the indexes of each item in in_keys found in dest_keys.
611 For example, if in_keys is ('A', 'C') and dest_keys is ('A', 'B', 'C'), the
612 return value will be (0, None, 1).
614 return tuple(_safe_index(in_keys, k) for k in dest_keys)
617 def _map_keys(mapping, items):
618 """Returns a tuple with items placed at mapping index.
620 For example, if mapping is (1, None, 0) and items is ('a', 'b'), it will
621 return ('b', None, 'c').
623 return tuple(items[i] if i != None else None for i in mapping)
626 class Configs(object):
627 """Represents a processed .isolate file.
629 Stores the file in a processed way, split by configuration.
631 At this point, we don't know all the possibilities. So mount a partial view
634 This class doesn't hold isolate_dir, since it is dependent on the final
635 configuration selected. It is implicitly dependent on which .isolate defines
636 the 'command' that will take effect.
638 def __init__(self, file_comment, config_variables):
639 self.file_comment = file_comment
640 # Contains the names of the config variables seen while processing
641 # .isolate file(s). The order is important since the same order is used for
642 # keys in self._by_config.
643 assert isinstance(config_variables, tuple)
644 assert all(isinstance(c, basestring) for c in config_variables), (
646 config_variables = tuple(config_variables)
647 assert tuple(sorted(config_variables)) == config_variables, config_variables
648 self._config_variables = config_variables
649 # The keys of _by_config are tuples of values for each of the items in
650 # self._config_variables. A None item in the list of the key means the value
655 def config_variables(self):
656 return self._config_variables
658 def get_config(self, config):
659 """Returns all configs that matches this config as a single ConfigSettings.
661 Returns an empty ConfigSettings if none apply.
663 # TODO(maruel): Fix ordering based on the bounded values. The keys are not
664 # necessarily sorted in the way that makes sense, they are alphabetically
665 # sorted. It is important because the left-most takes predescence.
666 out = ConfigSettings({}, None)
667 for k, v in sorted(self._by_config.iteritems()):
668 if all(i == j or j is None for i, j in zip(config, k)):
672 def set_config(self, key, value):
673 """Sets the ConfigSettings for this key.
675 The key is a tuple of bounded or unbounded variables. The global variable
676 is the key where all values are unbounded, e.g.:
677 (None,) * len(self._config_variables)
679 assert key not in self._by_config, (key, self._by_config.keys())
680 assert isinstance(key, tuple)
681 assert len(key) == len(self._config_variables), (
682 key, self._config_variables)
683 assert isinstance(value, ConfigSettings)
684 self._by_config[key] = value
686 def union(self, rhs):
687 """Returns a new Configs instance, the union of variables from self and rhs.
689 Uses self.file_comment if available, otherwise rhs.file_comment.
690 It keeps config_variables sorted in the output.
692 # Merge the keys of config_variables for each Configs instances. All the new
693 # variables will become unbounded. This requires realigning the keys.
694 config_variables = tuple(sorted(
695 set(self.config_variables) | set(rhs.config_variables)))
696 out = Configs(self.file_comment or rhs.file_comment, config_variables)
697 mapping_lhs = _get_map_keys(out.config_variables, self.config_variables)
698 mapping_rhs = _get_map_keys(out.config_variables, rhs.config_variables)
700 (_map_keys(mapping_lhs, k), v) for k, v in self._by_config.iteritems())
701 # pylint: disable=W0212
703 (_map_keys(mapping_rhs, k), v) for k, v in rhs._by_config.iteritems())
705 for key in set(lhs_config) | set(rhs_config):
706 l = lhs_config.get(key)
707 r = rhs_config.get(key)
708 out.set_config(key, l.union(r) if (l and r) else (l or r))
712 """Returns a flat dictionary representation of the configuration.
714 return dict((k, v.flatten()) for k, v in self._by_config.iteritems())
716 def make_isolate_file(self):
717 """Returns a dictionary suitable for writing to a .isolate file.
719 dependencies_by_config = self.flatten()
720 configs_by_dependency = reduce_inputs(invert_map(dependencies_by_config))
721 return convert_map_to_isolate_dict(configs_by_dependency,
722 self.config_variables)
725 return 'Configs(%s,%s)' % (
726 self._config_variables,
727 ''.join('\n %s' % str(f) for f in self._by_config))
730 def load_isolate_as_config(isolate_dir, value, file_comment):
731 """Parses one .isolate file and returns a Configs() instance.
734 isolate_dir: only used to load relative includes so it doesn't depend on
736 value: is the loaded dictionary that was defined in the gyp file.
737 file_comment: comments found at the top of the file so it can be preserved.
739 The expected format is strict, anything diverting from the format below will
746 ['OS=="vms" and foo=42', {
751 'isolate_dependency_tracked': [
754 'isolate_dependency_untracked': [
767 assert os.path.isabs(isolate_dir), isolate_dir
768 if any(len(cond) == 3 for cond in value.get('conditions', [])):
769 raise isolateserver.ConfigError('Using \'else\' is not supported anymore.')
770 variables_and_values = {}
771 verify_root(value, variables_and_values)
772 if variables_and_values:
773 config_variables, config_values = zip(
774 *sorted(variables_and_values.iteritems()))
775 all_configs = list(itertools.product(*config_values))
777 config_variables = ()
780 isolate = Configs(file_comment, config_variables)
782 # Add global variables. The global variables are on the empty tuple key.
784 (None,) * len(config_variables),
785 ConfigSettings(value.get('variables', {}), isolate_dir))
787 # Add configuration-specific variables.
788 for expr, then in value.get('conditions', []):
789 configs = match_configs(expr, config_variables, all_configs)
790 new = Configs(None, config_variables)
791 for config in configs:
792 new.set_config(config, ConfigSettings(then['variables'], isolate_dir))
793 isolate = isolate.union(new)
795 # Load the includes. Process them in reverse so the last one take precedence.
796 for include in reversed(value.get('includes', [])):
797 if os.path.isabs(include):
798 raise isolateserver.ConfigError(
799 'Failed to load configuration; absolute include path \'%s\'' %
801 included_isolate = os.path.normpath(os.path.join(isolate_dir, include))
802 if sys.platform == 'win32':
803 if included_isolate[0].lower() != isolate_dir[0].lower():
804 raise isolateserver.ConfigError(
805 'Can\'t reference a .isolate file from another drive')
806 with open(included_isolate, 'r') as f:
807 included_isolate = load_isolate_as_config(
808 os.path.dirname(included_isolate),
809 eval_content(f.read()),
811 isolate = isolate.union(included_isolate)
816 def load_isolate_for_config(isolate_dir, content, config_variables):
817 """Loads the .isolate file and returns the information unprocessed but
818 filtered for the specific OS.
821 tuple of command, dependencies, touched, read_only flag, isolate_dir.
822 The dependencies are fixed to use os.path.sep.
824 # Load the .isolate file, process its conditions, retrieve the command and
826 isolate = load_isolate_as_config(isolate_dir, eval_content(content), None)
829 config_variables[var] for var in isolate.config_variables)
831 raise isolateserver.ConfigError(
832 'These configuration variables were missing from the command line: %s' %
834 sorted(set(isolate.config_variables) - set(config_variables))))
836 # A configuration is to be created with all the combinations of free
838 config = isolate.get_config(config_name)
839 # Merge tracked and untracked variables, isolate.py doesn't care about the
840 # trackability of the variables, only the build tool does.
841 dependencies = sorted(
842 f.replace('/', os.path.sep) for f in config.tracked + config.untracked
844 touched = sorted(f.replace('/', os.path.sep) for f in config.touched)
846 config.command, dependencies, touched, config.read_only,