1 # Copyright 2014 The Swarming Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 that
3 # can be found in the LICENSE file.
5 """Contains logic to parse .isolate files.
7 This module doesn't touch the file system. It's the job of the client code to do
8 I/O on behalf of this module.
10 See more information at
11 https://code.google.com/p/swarming/wiki/IsolateDesign
12 https://code.google.com/p/swarming/wiki/IsolateUserGuide
24 # Files that should be 0-length when mapped.
25 KEY_TOUCHED = 'isolate_dependency_touched'
26 # Files that should be tracked by the build tool.
27 KEY_TRACKED = 'isolate_dependency_tracked'
28 # Files that should not be tracked by the build tool.
29 KEY_UNTRACKED = 'isolate_dependency_untracked'
31 # Valid variable name.
32 VALID_VARIABLE = '[A-Za-z_][A-Za-z_0-9]*'
35 class IsolateError(ValueError):
36 """Generic failure to load a .isolate file."""
40 def determine_root_dir(relative_root, infiles):
41 """For a list of infiles, determines the deepest root directory that is
42 referenced indirectly.
44 All arguments must be using os.path.sep.
46 # The trick used to determine the root directory is to look at "how far" back
47 # up it is looking up.
48 deepest_root = relative_root
51 while i.startswith('..' + os.path.sep):
53 assert not i.startswith(os.path.sep)
54 x = os.path.dirname(x)
55 if deepest_root.startswith(x):
58 'determine_root_dir(%s, %d files) -> %s',
59 relative_root, len(infiles), deepest_root)
63 def replace_variable(part, variables):
64 m = re.match(r'<\((' + VALID_VARIABLE + ')\)', part)
66 if m.group(1) not in variables:
68 'Variable "%s" was not found in %s.\nDid you forget to specify '
69 '--path-variable?' % (m.group(1), variables))
70 return str(variables[m.group(1)])
74 def eval_variables(item, variables):
75 """Replaces the .isolate variables in a string item.
77 Note that the .isolate format is a subset of the .gyp dialect.
80 replace_variable(p, variables)
81 for p in re.split(r'(<\(' + VALID_VARIABLE + '\))', item))
84 def pretty_print(variables, stdout):
85 """Outputs a .isolate file from the decoded variables.
87 The .isolate format is GYP compatible.
89 Similar to pprint.print() but with NIH syndrome.
91 # Order the dictionary keys by these keys in priority.
93 'variables', 'condition', 'command', 'files', 'read_only',
94 KEY_TRACKED, KEY_UNTRACKED)
97 """Gives priority to 'most important' keys before the others."""
99 return str(ORDER.index(x))
102 def loop_list(indent, items):
104 if isinstance(item, basestring):
105 stdout.write('%s\'%s\',\n' % (indent, item))
106 elif isinstance(item, dict):
107 stdout.write('%s{\n' % indent)
108 loop_dict(indent + ' ', item)
109 stdout.write('%s},\n' % indent)
110 elif isinstance(item, list):
111 # A list inside a list will write the first item embedded.
112 stdout.write('%s[' % indent)
113 for index, i in enumerate(item):
114 if isinstance(i, basestring):
116 '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))
117 elif isinstance(i, dict):
119 loop_dict(indent + ' ', i)
120 if index != len(item) - 1:
124 stdout.write('%s}%s' % (indent, x))
131 def loop_dict(indent, items):
132 for key in sorted(items, key=sorting_key):
134 stdout.write("%s'%s': " % (indent, key))
135 if isinstance(item, dict):
137 loop_dict(indent + ' ', item)
138 stdout.write(indent + '},\n')
139 elif isinstance(item, list):
141 loop_list(indent + ' ', item)
142 stdout.write(indent + '],\n')
143 elif isinstance(item, basestring):
145 '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))
146 elif isinstance(item, (int, bool)) or item is None:
147 stdout.write('%s,\n' % item)
152 loop_dict(' ', variables)
156 def print_all(comment, data, stream):
157 """Prints a complete .isolate file and its top-level file comment into a
161 stream.write(comment)
162 pretty_print(data, stream)
165 def extract_comment(content):
166 """Extracts file level comment."""
168 for line in content.splitlines(True):
169 if line.startswith('#'):
176 def eval_content(content):
177 """Evaluates a python file and return the value defined in it.
179 Used in practice for .isolate files.
181 globs = {'__builtins__': None}
184 value = eval(content, globs, locs)
185 except TypeError as e:
186 e.args = list(e.args) + [content]
188 assert locs == {}, locs
189 assert globs == {'__builtins__': None}, globs
193 def match_configs(expr, config_variables, all_configs):
194 """Returns the list of values from |values| that match the condition |expr|.
197 expr: string that is evaluatable with eval(). It is a GYP condition.
198 config_variables: list of the name of the variables.
199 all_configs: list of the list of possible values.
201 If a variable is not referenced at all, it is marked as unbounded (free) with
204 # It is more than just eval'ing the variable, it needs to be double checked to
205 # see if the variable is referenced at all. If not, the variable is free
207 # TODO(maruel): Use the intelligent way by inspecting expr instead of doing
208 # trial and error to figure out which variable is bound.
210 for bound_variables in itertools.product(
211 (True, False), repeat=len(config_variables)):
212 # Add the combination of variables bound.
215 [c for c, b in zip(config_variables, bound_variables) if b],
217 tuple(v if b else None for v, b in zip(line, bound_variables))
218 for line in all_configs)
222 for variables, configs in combinations:
223 # Strip variables and see if expr can still be evaluated.
224 for values in configs:
225 globs = {'__builtins__': None}
226 globs.update(zip(variables, (v for v in values if v is not None)))
228 assertion = eval(expr, globs, {})
231 if not isinstance(assertion, bool):
232 raise IsolateError('Invalid condition')
238 def verify_variables(variables):
239 """Verifies the |variables| dictionary is in the expected format."""
248 assert isinstance(variables, dict), variables
249 assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()
250 for name, value in variables.iteritems():
251 if name == 'read_only':
252 assert value in (0, 1, 2, None), value
254 assert isinstance(value, list), value
255 assert all(isinstance(i, basestring) for i in value), value
258 def verify_ast(expr, variables_and_values):
259 """Verifies that |expr| is of the form
260 expr ::= expr ( "or" | "and" ) expr
261 | identifier "==" ( string | int )
262 Also collects the variable identifiers and string/int values in the dict
263 |variables_and_values|, in the form {'var': set([val1, val2, ...]), ...}.
265 assert isinstance(expr, (ast.BoolOp, ast.Compare))
266 if isinstance(expr, ast.BoolOp):
267 assert isinstance(expr.op, (ast.And, ast.Or))
268 for subexpr in expr.values:
269 verify_ast(subexpr, variables_and_values)
271 assert isinstance(expr.left.ctx, ast.Load)
272 assert len(expr.ops) == 1
273 assert isinstance(expr.ops[0], ast.Eq)
274 var_values = variables_and_values.setdefault(expr.left.id, set())
275 rhs = expr.comparators[0]
276 assert isinstance(rhs, (ast.Str, ast.Num))
277 var_values.add(rhs.n if isinstance(rhs, ast.Num) else rhs.s)
280 def verify_condition(condition, variables_and_values):
281 """Verifies the |condition| dictionary is in the expected format.
282 See verify_ast() for the meaning of |variables_and_values|.
284 VALID_INSIDE_CONDITION = ['variables']
285 assert isinstance(condition, list), condition
286 assert len(condition) == 2, condition
287 expr, then = condition
289 test_ast = compile(expr, '<condition>', 'eval', ast.PyCF_ONLY_AST)
290 verify_ast(test_ast.body, variables_and_values)
292 assert isinstance(then, dict), then
293 assert set(VALID_INSIDE_CONDITION).issuperset(set(then)), then.keys()
294 if not 'variables' in then:
295 raise IsolateError('Missing \'variables\' in condition %s' % condition)
296 verify_variables(then['variables'])
299 def verify_root(value, variables_and_values):
300 """Verifies that |value| is the parsed form of a valid .isolate file.
302 See verify_ast() for the meaning of |variables_and_values|.
304 VALID_ROOTS = ['includes', 'conditions', 'variables']
305 assert isinstance(value, dict), value
306 assert set(VALID_ROOTS).issuperset(set(value)), value.keys()
308 includes = value.get('includes', [])
309 assert isinstance(includes, list), includes
310 for include in includes:
311 assert isinstance(include, basestring), include
313 conditions = value.get('conditions', [])
314 assert isinstance(conditions, list), conditions
315 for condition in conditions:
316 verify_condition(condition, variables_and_values)
318 variables = value.get('variables', {})
319 verify_variables(variables)
322 def get_folders(values_dict):
323 """Returns a dict of all the folders in the given value_dict."""
325 (item, configs) for (item, configs) in values_dict.iteritems()
326 if item.endswith('/')
330 class ConfigSettings(object):
331 """Represents the dependency variables for a single build configuration.
333 The structure is immutable.
335 .command and .isolate_dir describe how to run the command. .isolate_dir uses
336 the OS' native path separator. It must be an absolute path, it's the path
337 where to start the command from.
338 .files is the list of dependencies. The items use '/' as a path separator.
339 .read_only describe how to map the files.
341 def __init__(self, values, isolate_dir):
342 verify_variables(values)
343 if isolate_dir is None:
344 # It must be an empty object if isolate_dir is None.
345 assert values == {}, values
347 # Otherwise, the path must be absolute.
348 assert os.path.isabs(isolate_dir), isolate_dir
351 values.get('files', []) +
352 values.get(KEY_TOUCHED, []) +
353 values.get(KEY_TRACKED, []) +
354 values.get(KEY_UNTRACKED, []))
355 self.command = values.get('command', [])[:]
356 self.isolate_dir = isolate_dir
357 self.read_only = values.get('read_only')
359 def union(self, rhs):
360 """Merges two config settings together into a new instance.
362 A new instance is not created and self or rhs is returned if the other
363 object is the empty object.
365 self has priority over rhs for .command. Use the same .isolate_dir as the
366 one having a .command.
368 Dependencies listed in rhs are patch adjusted ONLY if they don't start with
369 a path variable, e.g. the characters '<('.
371 # When an object has .isolate_dir == None, it means it is the empty object.
372 if rhs.isolate_dir is None:
374 if self.isolate_dir is None:
377 if sys.platform == 'win32':
378 assert self.isolate_dir[0].lower() == rhs.isolate_dir[0].lower()
380 # Takes the difference between the two isolate_dir. Note that while
381 # isolate_dir is in native path case, all other references are in posix.
382 l_rel_cwd, r_rel_cwd = self.isolate_dir, rhs.isolate_dir
383 if self.command or rhs.command:
384 use_rhs = bool(not self.command and rhs.command)
386 # If self doesn't define any file, use rhs.
387 use_rhs = not bool(self.files)
389 # Rebase files in rhs.
390 l_rel_cwd, r_rel_cwd = r_rel_cwd, l_rel_cwd
392 rebase_path = os.path.relpath(r_rel_cwd, l_rel_cwd).replace(
395 if f.startswith('<(') or rebase_path == '.':
397 return posixpath.join(rebase_path, f)
400 """Rebase items in either lhs or rhs, as needed."""
403 return sorted(l + map(rebase_item, r))
406 'command': self.command or rhs.command,
407 'files': map_both(self.files, rhs.files),
408 'read_only': rhs.read_only if self.read_only is None else self.read_only,
410 return ConfigSettings(var, l_rel_cwd)
413 """Converts the object into a dict."""
416 out['command'] = self.command
418 out['files'] = self.files
419 if self.read_only is not None:
420 out['read_only'] = self.read_only
421 # TODO(maruel): Probably better to not output it if command is None?
422 if self.isolate_dir is not None:
423 out['isolate_dir'] = self.isolate_dir
427 """Returns a short representation useful for debugging."""
428 files = ''.join('\n ' + f for f in self.files)
429 return 'ConfigSettings(%s, %s, %s, %s)' % (
436 def _safe_index(l, k):
443 def _get_map_keys(dest_keys, in_keys):
444 """Returns a tuple of the indexes of each item in in_keys found in dest_keys.
446 For example, if in_keys is ('A', 'C') and dest_keys is ('A', 'B', 'C'), the
447 return value will be (0, None, 1).
449 return tuple(_safe_index(in_keys, k) for k in dest_keys)
452 def _map_keys(mapping, items):
453 """Returns a tuple with items placed at mapping index.
455 For example, if mapping is (1, None, 0) and items is ('a', 'b'), it will
456 return ('b', None, 'c').
458 return tuple(items[i] if i != None else None for i in mapping)
461 class Configs(object):
462 """Represents a processed .isolate file.
464 Stores the file in a processed way, split by configuration.
466 At this point, we don't know all the possibilities. So mount a partial view
469 This class doesn't hold isolate_dir, since it is dependent on the final
470 configuration selected. It is implicitly dependent on which .isolate defines
471 the 'command' that will take effect.
473 def __init__(self, file_comment, config_variables):
474 self.file_comment = file_comment
475 # Contains the names of the config variables seen while processing
476 # .isolate file(s). The order is important since the same order is used for
477 # keys in self._by_config.
478 assert isinstance(config_variables, tuple)
479 assert all(isinstance(c, basestring) for c in config_variables), (
481 config_variables = tuple(config_variables)
482 assert tuple(sorted(config_variables)) == config_variables, config_variables
483 self._config_variables = config_variables
484 # The keys of _by_config are tuples of values for each of the items in
485 # self._config_variables. A None item in the list of the key means the value
490 def config_variables(self):
491 return self._config_variables
493 def get_config(self, config):
494 """Returns all configs that matches this config as a single ConfigSettings.
496 Returns an empty ConfigSettings if none apply.
498 # TODO(maruel): Fix ordering based on the bounded values. The keys are not
499 # necessarily sorted in the way that makes sense, they are alphabetically
500 # sorted. It is important because the left-most takes predescence.
501 out = ConfigSettings({}, None)
502 for k, v in sorted(self._by_config.iteritems()):
503 if all(i == j or j is None for i, j in zip(config, k)):
507 def set_config(self, key, value):
508 """Sets the ConfigSettings for this key.
510 The key is a tuple of bounded or unbounded variables. The global variable
511 is the key where all values are unbounded, e.g.:
512 (None,) * len(self._config_variables)
514 assert key not in self._by_config, (key, self._by_config.keys())
515 assert isinstance(key, tuple)
516 assert len(key) == len(self._config_variables), (
517 key, self._config_variables)
518 assert isinstance(value, ConfigSettings)
519 self._by_config[key] = value
521 def union(self, rhs):
522 """Returns a new Configs instance, the union of variables from self and rhs.
524 Uses self.file_comment if available, otherwise rhs.file_comment.
525 It keeps config_variables sorted in the output.
527 # Merge the keys of config_variables for each Configs instances. All the new
528 # variables will become unbounded. This requires realigning the keys.
529 config_variables = tuple(sorted(
530 set(self.config_variables) | set(rhs.config_variables)))
531 out = Configs(self.file_comment or rhs.file_comment, config_variables)
532 mapping_lhs = _get_map_keys(out.config_variables, self.config_variables)
533 mapping_rhs = _get_map_keys(out.config_variables, rhs.config_variables)
535 (_map_keys(mapping_lhs, k), v) for k, v in self._by_config.iteritems())
536 # pylint: disable=W0212
538 (_map_keys(mapping_rhs, k), v) for k, v in rhs._by_config.iteritems())
540 for key in set(lhs_config) | set(rhs_config):
541 l = lhs_config.get(key)
542 r = rhs_config.get(key)
543 out.set_config(key, l.union(r) if (l and r) else (l or r))
547 """Returns a flat dictionary representation of the configuration.
549 return dict((k, v.flatten()) for k, v in self._by_config.iteritems())
552 return 'Configs(%s,%s)' % (
553 self._config_variables,
554 ''.join('\n %s' % str(f) for f in self._by_config))
557 def load_isolate_as_config(isolate_dir, value, file_comment):
558 """Parses one .isolate file and returns a Configs() instance.
561 isolate_dir: only used to load relative includes so it doesn't depend on
563 value: is the loaded dictionary that was defined in the gyp file.
564 file_comment: comments found at the top of the file so it can be preserved.
566 The expected format is strict, anything diverting from the format below will
573 ['OS=="vms" and foo=42', {
591 assert os.path.isabs(isolate_dir), isolate_dir
592 if any(len(cond) == 3 for cond in value.get('conditions', [])):
593 raise IsolateError('Using \'else\' is not supported anymore.')
594 variables_and_values = {}
595 verify_root(value, variables_and_values)
596 if variables_and_values:
597 config_variables, config_values = zip(
598 *sorted(variables_and_values.iteritems()))
599 all_configs = list(itertools.product(*config_values))
601 config_variables = ()
604 isolate = Configs(file_comment, config_variables)
606 # Add global variables. The global variables are on the empty tuple key.
608 (None,) * len(config_variables),
609 ConfigSettings(value.get('variables', {}), isolate_dir))
611 # Add configuration-specific variables.
612 for expr, then in value.get('conditions', []):
613 configs = match_configs(expr, config_variables, all_configs)
614 new = Configs(None, config_variables)
615 for config in configs:
616 new.set_config(config, ConfigSettings(then['variables'], isolate_dir))
617 isolate = isolate.union(new)
619 # Load the includes. Process them in reverse so the last one take precedence.
620 for include in reversed(value.get('includes', [])):
621 if os.path.isabs(include):
623 'Failed to load configuration; absolute include path \'%s\'' %
625 included_isolate = os.path.normpath(os.path.join(isolate_dir, include))
626 if sys.platform == 'win32':
627 if included_isolate[0].lower() != isolate_dir[0].lower():
629 'Can\'t reference a .isolate file from another drive')
630 with open(included_isolate, 'r') as f:
631 included_isolate = load_isolate_as_config(
632 os.path.dirname(included_isolate),
633 eval_content(f.read()),
635 isolate = isolate.union(included_isolate)
640 def load_isolate_for_config(isolate_dir, content, config_variables):
641 """Loads the .isolate file and returns the information unprocessed but
642 filtered for the specific OS.
645 tuple of command, dependencies, read_only flag, isolate_dir.
646 The dependencies are fixed to use os.path.sep.
648 # Load the .isolate file, process its conditions, retrieve the command and
650 isolate = load_isolate_as_config(isolate_dir, eval_content(content), None)
653 config_variables[var] for var in isolate.config_variables)
656 'These configuration variables were missing from the command line: %s' %
658 sorted(set(isolate.config_variables) - set(config_variables))))
660 # A configuration is to be created with all the combinations of free
662 config = isolate.get_config(config_name)
663 dependencies = [f.replace('/', os.path.sep) for f in config.files]
664 return config.command, dependencies, config.read_only, config.isolate_dir