1 # Copyright 2014 The Swarming Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 that
3 # can be found in the LICENSE file.
5 """Contains logic to parse .isolate files.
7 This module doesn't touch the file system. It's the job of the client code to do
8 I/O on behalf of this module.
10 See more information at
11 https://code.google.com/p/swarming/wiki/IsolateDesign
12 https://code.google.com/p/swarming/wiki/IsolateUserGuide
24 from utils import short_expression_finder
26 # Files that should be 0-length when mapped.
27 KEY_TOUCHED = 'isolate_dependency_touched'
28 # Files that should be tracked by the build tool.
29 KEY_TRACKED = 'isolate_dependency_tracked'
30 # Files that should not be tracked by the build tool.
31 KEY_UNTRACKED = 'isolate_dependency_untracked'
33 # Valid variable name.
34 VALID_VARIABLE = '[A-Za-z_][A-Za-z_0-9]*'
37 def determine_root_dir(relative_root, infiles):
38 """For a list of infiles, determines the deepest root directory that is
39 referenced indirectly.
41 All arguments must be using os.path.sep.
43 # The trick used to determine the root directory is to look at "how far" back
44 # up it is looking up.
45 deepest_root = relative_root
48 while i.startswith('..' + os.path.sep):
50 assert not i.startswith(os.path.sep)
51 x = os.path.dirname(x)
52 if deepest_root.startswith(x):
55 'determine_root_dir(%s, %d files) -> %s' % (
56 relative_root, len(infiles), deepest_root))
60 def replace_variable(part, variables):
61 m = re.match(r'<\((' + VALID_VARIABLE + ')\)', part)
63 if m.group(1) not in variables:
64 raise isolateserver.ConfigError(
65 'Variable "%s" was not found in %s.\nDid you forget to specify '
66 '--path-variable?' % (m.group(1), variables))
67 return variables[m.group(1)]
71 def eval_variables(item, variables):
72 """Replaces the .isolate variables in a string item.
74 Note that the .isolate format is a subset of the .gyp dialect.
77 replace_variable(p, variables)
78 for p in re.split(r'(<\(' + VALID_VARIABLE + '\))', item))
81 def split_touched(files):
82 """Splits files that are touched vs files that are read."""
90 return tracked, touched
93 def pretty_print(variables, stdout):
94 """Outputs a gyp compatible list from the decoded variables.
96 Similar to pprint.print() but with NIH syndrome.
98 # Order the dictionary keys by these keys in priority.
100 'variables', 'condition', 'command', 'relative_cwd', 'read_only',
101 KEY_TRACKED, KEY_UNTRACKED)
104 """Gives priority to 'most important' keys before the others."""
106 return str(ORDER.index(x))
109 def loop_list(indent, items):
111 if isinstance(item, basestring):
112 stdout.write('%s\'%s\',\n' % (indent, item))
113 elif isinstance(item, dict):
114 stdout.write('%s{\n' % indent)
115 loop_dict(indent + ' ', item)
116 stdout.write('%s},\n' % indent)
117 elif isinstance(item, list):
118 # A list inside a list will write the first item embedded.
119 stdout.write('%s[' % indent)
120 for index, i in enumerate(item):
121 if isinstance(i, basestring):
123 '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))
124 elif isinstance(i, dict):
126 loop_dict(indent + ' ', i)
127 if index != len(item) - 1:
131 stdout.write('%s}%s' % (indent, x))
138 def loop_dict(indent, items):
139 for key in sorted(items, key=sorting_key):
141 stdout.write("%s'%s': " % (indent, key))
142 if isinstance(item, dict):
144 loop_dict(indent + ' ', item)
145 stdout.write(indent + '},\n')
146 elif isinstance(item, list):
148 loop_list(indent + ' ', item)
149 stdout.write(indent + '],\n')
150 elif isinstance(item, basestring):
152 '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))
153 elif isinstance(item, (int, bool)) or item is None:
154 stdout.write('%s\n' % item)
159 loop_dict(' ', variables)
163 def print_all(comment, data, stream):
164 """Prints a complete .isolate file and its top-level file comment into a
168 stream.write(comment)
169 pretty_print(data, stream)
173 """Merges two compatible datastructures composed of dict/list/set."""
174 assert lhs is not None or rhs is not None
176 return copy.deepcopy(rhs)
178 return copy.deepcopy(lhs)
179 assert type(lhs) == type(rhs), (lhs, rhs)
180 if hasattr(lhs, 'union'):
181 # Includes set, ConfigSettings and Configs.
182 return lhs.union(rhs)
183 if isinstance(lhs, dict):
184 return dict((k, union(lhs.get(k), rhs.get(k))) for k in set(lhs).union(rhs))
185 elif isinstance(lhs, list):
186 # Do not go inside the list.
188 assert False, type(lhs)
191 def extract_comment(content):
192 """Extracts file level comment."""
194 for line in content.splitlines(True):
195 if line.startswith('#'):
202 def eval_content(content):
203 """Evaluates a python file and return the value defined in it.
205 Used in practice for .isolate files.
207 globs = {'__builtins__': None}
210 value = eval(content, globs, locs)
211 except TypeError as e:
212 e.args = list(e.args) + [content]
214 assert locs == {}, locs
215 assert globs == {'__builtins__': None}, globs
219 def match_configs(expr, config_variables, all_configs):
220 """Returns the list of values from |values| that match the condition |expr|.
223 expr: string that is evaluatable with eval(). It is a GYP condition.
224 config_variables: list of the name of the variables.
225 all_configs: list of the list of possible values.
227 If a variable is not referenced at all, it is marked as unbounded (free) with
230 # It is more than just eval'ing the variable, it needs to be double checked to
231 # see if the variable is referenced at all. If not, the variable is free
233 # TODO(maruel): Use the intelligent way by inspecting expr instead of doing
234 # trial and error to figure out which variable is bound.
236 for bound_variables in itertools.product(
237 (True, False), repeat=len(config_variables)):
238 # Add the combination of variables bound.
241 [c for c, b in zip(config_variables, bound_variables) if b],
243 tuple(v if b else None for v, b in zip(line, bound_variables))
244 for line in all_configs)
248 for variables, configs in combinations:
249 # Strip variables and see if expr can still be evaluated.
250 for values in configs:
251 globs = {'__builtins__': None}
252 globs.update(zip(variables, (v for v in values if v is not None)))
254 assertion = eval(expr, globs, {})
257 if not isinstance(assertion, bool):
258 raise isolateserver.ConfigError('Invalid condition')
264 def verify_variables(variables):
265 """Verifies the |variables| dictionary is in the expected format."""
273 assert isinstance(variables, dict), variables
274 assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()
275 for name, value in variables.iteritems():
276 if name == 'read_only':
277 assert value in (0, 1, 2, None), value
279 assert isinstance(value, list), value
280 assert all(isinstance(i, basestring) for i in value), value
283 def verify_ast(expr, variables_and_values):
284 """Verifies that |expr| is of the form
285 expr ::= expr ( "or" | "and" ) expr
286 | identifier "==" ( string | int )
287 Also collects the variable identifiers and string/int values in the dict
288 |variables_and_values|, in the form {'var': set([val1, val2, ...]), ...}.
290 assert isinstance(expr, (ast.BoolOp, ast.Compare))
291 if isinstance(expr, ast.BoolOp):
292 assert isinstance(expr.op, (ast.And, ast.Or))
293 for subexpr in expr.values:
294 verify_ast(subexpr, variables_and_values)
296 assert isinstance(expr.left.ctx, ast.Load)
297 assert len(expr.ops) == 1
298 assert isinstance(expr.ops[0], ast.Eq)
299 var_values = variables_and_values.setdefault(expr.left.id, set())
300 rhs = expr.comparators[0]
301 assert isinstance(rhs, (ast.Str, ast.Num))
302 var_values.add(rhs.n if isinstance(rhs, ast.Num) else rhs.s)
305 def verify_condition(condition, variables_and_values):
306 """Verifies the |condition| dictionary is in the expected format.
307 See verify_ast() for the meaning of |variables_and_values|.
309 VALID_INSIDE_CONDITION = ['variables']
310 assert isinstance(condition, list), condition
311 assert len(condition) == 2, condition
312 expr, then = condition
314 test_ast = compile(expr, '<condition>', 'eval', ast.PyCF_ONLY_AST)
315 verify_ast(test_ast.body, variables_and_values)
317 assert isinstance(then, dict), then
318 assert set(VALID_INSIDE_CONDITION).issuperset(set(then)), then.keys()
319 if not 'variables' in then:
320 raise isolateserver.ConfigError('Missing \'variables\' in condition %s' %
322 verify_variables(then['variables'])
325 def verify_root(value, variables_and_values):
326 """Verifies that |value| is the parsed form of a valid .isolate file.
327 See verify_ast() for the meaning of |variables_and_values|.
329 VALID_ROOTS = ['includes', 'conditions']
330 assert isinstance(value, dict), value
331 assert set(VALID_ROOTS).issuperset(set(value)), value.keys()
333 includes = value.get('includes', [])
334 assert isinstance(includes, list), includes
335 for include in includes:
336 assert isinstance(include, basestring), include
338 conditions = value.get('conditions', [])
339 assert isinstance(conditions, list), conditions
340 for condition in conditions:
341 verify_condition(condition, variables_and_values)
344 def remove_weak_dependencies(values, key, item, item_configs):
345 """Removes any configs from this key if the item is already under a
348 if key == KEY_TOUCHED:
349 item_configs = set(item_configs)
350 for stronger_key in (KEY_TRACKED, KEY_UNTRACKED):
352 item_configs -= values[stronger_key][item]
359 def remove_repeated_dependencies(folders, key, item, item_configs):
360 """Removes any configs from this key if the item is in a folder that is
363 if key in (KEY_UNTRACKED, KEY_TRACKED, KEY_TOUCHED):
364 item_configs = set(item_configs)
365 for (folder, configs) in folders.iteritems():
366 if folder != item and item.startswith(folder):
367 item_configs -= configs
372 def get_folders(values_dict):
373 """Returns a dict of all the folders in the given value_dict."""
375 (item, configs) for (item, configs) in values_dict.iteritems()
376 if item.endswith('/')
380 def invert_map(variables):
381 """Converts {config: {deptype: list(depvals)}} to
382 {deptype: {depval: set(configs)}}.
391 out = dict((key, {}) for key in KEYS)
392 for config, values in variables.iteritems():
395 items = [tuple(values[key])] if key in values else []
396 elif key == 'read_only':
397 items = [values[key]] if key in values else []
399 assert key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED)
400 items = values.get(key, [])
402 out[key].setdefault(item, set()).add(config)
406 def reduce_inputs(values):
407 """Reduces the output of invert_map() to the strictest minimum list.
409 Looks at each individual file and directory, maps where they are used and
410 reconstructs the inverse dictionary.
412 Returns the minimized dictionary.
422 # Folders can only live in KEY_UNTRACKED.
423 folders = get_folders(values.get(KEY_UNTRACKED, {}))
425 out = dict((key, {}) for key in KEYS)
427 for item, item_configs in values.get(key, {}).iteritems():
428 item_configs = remove_weak_dependencies(values, key, item, item_configs)
429 item_configs = remove_repeated_dependencies(
430 folders, key, item, item_configs)
432 out[key][item] = item_configs
436 def convert_map_to_isolate_dict(values, config_variables):
437 """Regenerates back a .isolate configuration dict from files and dirs
438 mappings generated from reduce_inputs().
440 # Gather a list of configurations for set inversion later.
441 all_mentioned_configs = set()
442 for configs_by_item in values.itervalues():
443 for configs in configs_by_item.itervalues():
444 all_mentioned_configs.update(configs)
446 # Invert the mapping to make it dict first.
449 for item, configs in values[key].iteritems():
450 then = conditions.setdefault(frozenset(configs), {})
451 variables = then.setdefault('variables', {})
453 if key == 'read_only':
454 if not isinstance(item, int):
455 raise isolateserver.ConfigError(
456 'Unexpected entry type %r for key %s' % (item, key))
457 variables[key] = item
458 elif key == 'command':
459 if not isinstance(item, tuple):
460 raise isolateserver.ConfigError(
461 'Unexpected entry type %r for key %s' % (item, key))
463 raise isolateserver.ConfigError('Unexpected duplicate key %s' % key)
465 raise isolateserver.ConfigError(
466 'Expected non empty entry in %s' % key)
467 variables[key] = list(item)
468 elif key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED):
469 if not isinstance(item, basestring):
470 raise isolateserver.ConfigError('Unexpected entry type %r' % item)
472 raise isolateserver.ConfigError(
473 'Expected non empty entry in %s' % key)
474 # The list of items (files or dirs). Append the new item and keep
476 l = variables.setdefault(key, [])
480 raise isolateserver.ConfigError('Unexpected key %s' % key)
482 if all_mentioned_configs:
483 # Change [(1, 2), (3, 4)] to [set(1, 3), set(2, 4)]
484 config_values = map(set, zip(*all_mentioned_configs))
485 for i in config_values:
487 sef = short_expression_finder.ShortExpressionFinder(
488 zip(config_variables, config_values))
489 conditions = sorted([sef.get_expr(c), v] for c, v in conditions.iteritems())
492 return {'conditions': conditions}
495 class ConfigSettings(object):
496 """Represents the dependency variables for a single build configuration.
497 The structure is immutable.
499 def __init__(self, values):
500 verify_variables(values)
501 self.touched = sorted(values.get(KEY_TOUCHED, []))
502 self.tracked = sorted(values.get(KEY_TRACKED, []))
503 self.untracked = sorted(values.get(KEY_UNTRACKED, []))
504 self.command = values.get('command', [])[:]
505 self.read_only = values.get('read_only')
507 def union(self, rhs):
508 """Merges two config settings together.
510 self has priority over rhs for 'command' variable.
513 KEY_TOUCHED: sorted(self.touched + rhs.touched),
514 KEY_TRACKED: sorted(self.tracked + rhs.tracked),
515 KEY_UNTRACKED: sorted(self.untracked + rhs.untracked),
516 'command': self.command or rhs.command,
517 'read_only': rhs.read_only if self.read_only is None else self.read_only,
519 return ConfigSettings(var)
524 out['command'] = self.command
526 out[KEY_TOUCHED] = self.touched
528 out[KEY_TRACKED] = self.tracked
530 out[KEY_UNTRACKED] = self.untracked
531 if self.read_only is not None:
532 out['read_only'] = self.read_only
536 def _safe_index(l, k):
543 def _get_map_keys(dest_keys, in_keys):
544 """Returns a tuple of the indexes of each item in in_keys found in dest_keys.
546 For example, if in_keys is ('A', 'C') and dest_keys is ('A', 'B', 'C'), the
547 return value will be (0, None, 1).
549 return tuple(_safe_index(in_keys, k) for k in dest_keys)
552 def _map_keys(mapping, items):
553 """Returns a tuple with items placed at mapping index.
555 For example, if mapping is (1, None, 0) and items is ('a', 'b'), it will
556 return ('b', None, 'c').
558 return tuple(items[i] if i != None else None for i in mapping)
561 class Configs(object):
562 """Represents a processed .isolate file.
564 Stores the file in a processed way, split by configuration.
566 At this point, we don't know all the possibilities. So mount a partial view
569 def __init__(self, file_comment, config_variables):
570 self.file_comment = file_comment
571 # Contains the names of the config variables seen while processing
572 # .isolate file(s). The order is important since the same order is used for
573 # keys in self._by_config.
574 assert isinstance(config_variables, tuple)
575 self._config_variables = config_variables
576 # The keys of _by_config are tuples of values for each of the items in
577 # self._config_variables. A None item in the list of the key means the value
582 def config_variables(self):
583 return self._config_variables
585 def get_config(self, config):
586 """Returns all configs that matches this config as a single ConfigSettings.
588 Returns None if no matching configuration is found.
591 for k, v in self._by_config.iteritems():
592 if all(i == j or j is None for i, j in zip(config, k)):
593 out = out.union(v) if out else v
596 def union(self, rhs):
597 """Adds variables from rhs (a Configs) to the existing variables."""
598 # Takes the first file comment, prefering lhs.
600 # Default mapping of configs.
601 lhs_config = self._by_config
602 # pylint: disable=W0212
603 rhs_config = rhs._by_config
604 comment = self.file_comment or rhs.file_comment
605 if not self.config_variables:
606 assert not self._by_config
607 out = Configs(comment, rhs.config_variables)
608 elif not rhs.config_variables:
609 assert not rhs._by_config
610 out = Configs(comment, self.config_variables)
611 elif rhs.config_variables == self.config_variables:
612 out = Configs(comment, self.config_variables)
614 # At that point, we need to merge the keys. By default, all the new
615 # variables will become unbounded. This requires realigning the keys.
616 config_variables = tuple(sorted(
617 set(self.config_variables) | set(rhs.config_variables)))
618 out = Configs(comment, config_variables)
620 mapping_lhs = _get_map_keys(out.config_variables, self.config_variables)
621 mapping_rhs = _get_map_keys(out.config_variables, rhs.config_variables)
623 (_map_keys(mapping_lhs, k), v)
624 for k, v in self._by_config.iteritems())
626 (_map_keys(mapping_rhs, k), v)
627 for k, v in rhs._by_config.iteritems())
629 for key in set(lhs_config) | set(rhs_config):
630 out._by_config[key] = union(lhs_config.get(key), rhs_config.get(key))
634 """Returns a flat dictionary representation of the configuration.
636 return dict((k, v.flatten()) for k, v in self._by_config.iteritems())
638 def make_isolate_file(self):
639 """Returns a dictionary suitable for writing to a .isolate file.
641 dependencies_by_config = self.flatten()
642 configs_by_dependency = reduce_inputs(invert_map(dependencies_by_config))
643 return convert_map_to_isolate_dict(configs_by_dependency,
644 self.config_variables)
647 def convert_old_to_new_format(value):
648 """Converts from the old .isolate format, which only has one variable (OS),
649 always includes 'linux', 'mac' and 'win' in the set of valid values for OS,
650 and allows conditions that depend on the set of all OSes, to the new format,
651 which allows any set of variables, has no hardcoded values, and only allows
652 explicit positive tests of variable values.
654 TODO(maruel): Formalize support for variables with a config with no variable
655 bound. This is sensible to keep them at the global level and not in a
658 conditions = value.get('conditions', [])
659 if 'variables' not in value and all(len(cond) == 2 for cond in conditions):
660 return value # Nothing to change
662 def parse_condition(cond):
663 m = re.match(r'OS=="(\w+)"\Z', cond[0])
665 raise isolateserver.ConfigError('Invalid condition: %s' % cond[0])
668 oses = set(map(parse_condition, conditions))
669 default_oses = set(['linux', 'mac', 'win'])
670 oses = sorted(oses | default_oses)
672 def if_not_os(not_os, then):
673 expr = ' or '.join('OS=="%s"' % os for os in oses if os != not_os)
677 cond[:2] for cond in conditions if cond[1]
679 if_not_os(parse_condition(cond), cond[2])
680 for cond in conditions if len(cond) == 3
683 if 'variables' in value:
684 conditions.append(if_not_os(None, {'variables': value.pop('variables')}))
688 value['conditions'] = conditions
692 def load_isolate_as_config(isolate_dir, value, file_comment):
693 """Parses one .isolate file and returns a Configs() instance.
696 isolate_dir: only used to load relative includes so it doesn't depend on
698 value: is the loaded dictionary that was defined in the gyp file.
699 file_comment: comments found at the top of the file so it can be preserved.
701 The expected format is strict, anything diverting from the format below will
708 ['OS=="vms" and foo=42', {
713 'isolate_dependency_tracked': [
716 'isolate_dependency_untracked': [
726 value = convert_old_to_new_format(value)
728 variables_and_values = {}
729 verify_root(value, variables_and_values)
730 if variables_and_values:
731 config_variables, config_values = zip(
732 *sorted(variables_and_values.iteritems()))
733 all_configs = list(itertools.product(*config_values))
735 config_variables = ()
738 isolate = Configs(file_comment, config_variables)
740 # Add configuration-specific variables.
741 for expr, then in value.get('conditions', []):
742 configs = match_configs(expr, config_variables, all_configs)
743 new = Configs(None, config_variables)
744 for config in configs:
745 # pylint: disable=W0212
746 new._by_config[config] = ConfigSettings(then['variables'])
747 isolate = isolate.union(new)
749 # Load the includes. Process them in reverse so the last one take precedence.
750 for include in reversed(value.get('includes', [])):
751 if os.path.isabs(include):
752 raise isolateserver.ConfigError(
753 'Failed to load configuration; absolute include path \'%s\'' %
755 included_isolate = os.path.normpath(os.path.join(isolate_dir, include))
756 with open(included_isolate, 'r') as f:
757 included_isolate = load_isolate_as_config(
758 os.path.dirname(included_isolate),
759 eval_content(f.read()),
761 isolate = union(isolate, included_isolate)
766 def load_isolate_for_config(isolate_dir, content, config_variables):
767 """Loads the .isolate file and returns the information unprocessed but
768 filtered for the specific OS.
770 Returns the command, dependencies and read_only flag. The dependencies are
771 fixed to use os.path.sep.
773 # Load the .isolate file, process its conditions, retrieve the command and
775 isolate = load_isolate_as_config(isolate_dir, eval_content(content), None)
778 config_variables[var] for var in isolate.config_variables)
780 raise isolateserver.ConfigError(
781 'These configuration variables were missing from the command line: %s' %
783 sorted(set(isolate.config_variables) - set(config_variables))))
785 # A configuration is to be created with all the combinations of free
787 config = isolate.get_config(config_name)
789 logging.debug('Loaded an empty .isolate file from %s', isolate_dir)
790 return [], [], [], None
792 # Merge tracked and untracked variables, isolate.py doesn't care about the
793 # trackability of the variables, only the build tool does.
795 f.replace('/', os.path.sep) for f in config.tracked + config.untracked
797 touched = [f.replace('/', os.path.sep) for f in config.touched]
798 return config.command, dependencies, touched, config.read_only