1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """A database of OWNERS files.
7 OWNERS files indicate who is allowed to approve changes in a specific directory
8 (or who is allowed to make changes without needing approval of another OWNER).
9 Note that all changes must still be reviewed by someone familiar with the code,
10 so you may need approval from both an OWNER and a reviewer in many cases.
12 The syntax of the OWNERS file is, roughly:
14 lines := (\s* line? \s* "\n")*
17 | "per-file" \s+ glob \s* "=" \s* directive
20 directive := "set noparent"
24 glob := [a-zA-Z0-9_-*?]+
26 comment := "#" [^"\n"]*
28 Email addresses must follow the foo@bar.com short form (exact syntax given
29 in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
30 shell conventions, and relative and absolute paths are not allowed (i.e.,
31 globs only refer to the files in the current directory).
33 If a user's email is one of the email_addresses in the file, the user is
34 considered an "OWNER" for all files in the directory.
36 If the "per-file" directive is used, the line only applies to files in that
37 directory that match the filename glob specified.
39 If the "set noparent" directive used, then only entries in this OWNERS file
40 apply to files in this directory; if the "set noparent" directive is not
41 used, then entries in OWNERS files in enclosing (upper) directories also
42 apply (up until a "set noparent is encountered").
44 If "per-file glob=set noparent" is used, then global directives are ignored
45 for the glob, and only the "per-file" owners are used for files matching that
48 Examples for all of these combinations can be found in tests/owners_unittest.py.
56 # If this is present by itself on a line, this means that everyone can review.
60 # Recognizes 'X@Y' email addresses. Very simplistic.
61 BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
64 def _assert_is_collection(obj):
65 assert not isinstance(obj, basestring)
66 # Module 'collections' has no 'Iterable' member
67 # pylint: disable=E1101
68 if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
69 assert (isinstance(obj, collections.Iterable) and
70 isinstance(obj, collections.Sized))
73 class SyntaxErrorInOwnersFile(Exception):
74 def __init__(self, path, lineno, msg):
75 super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
81 return '%s:%d syntax error: %s' % (self.path, self.lineno, self.msg)
84 class Database(object):
85 """A database of OWNERS files for a repository.
87 This class allows you to find a suggested set of reviewers for a list
88 of changed files, and see if a list of changed files is covered by a
91 def __init__(self, root, fopen, os_path, glob):
93 root: the path to the root of the Repository
94 open: function callback to open a text file for reading
95 os_path: module/object callback with fields for 'abspath', 'dirname',
97 glob: function callback to list entries in a directory match a glob
102 self.os_path = os_path
105 # Pick a default email regexp to use; callers can override as desired.
106 self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
108 # Mapping of owners to the paths they own.
109 self.owned_by = {EVERYONE: set()}
111 # Mapping of paths to authorized owners.
114 # Mapping reviewers to the preceding comment per file in the OWNERS files.
117 # Set of paths that stop us from looking above them for owners.
118 # (This is implicitly true for the root directory).
119 self.stop_looking = set([''])
121 def reviewers_for(self, files, author):
122 """Returns a suggested set of reviewers that will cover the files.
124 files is a sequence of paths relative to (and under) self.root.
125 If author is nonempty, we ensure it is not included in the set returned
126 in order avoid suggesting the author as a reviewer for their own changes."""
127 self._check_paths(files)
128 self.load_data_needed_for(files)
129 suggested_owners = self._covering_set_of_owners_for(files, author)
130 if EVERYONE in suggested_owners:
131 if len(suggested_owners) > 1:
132 suggested_owners.remove(EVERYONE)
134 suggested_owners = set(['<anyone>'])
135 return suggested_owners
137 def files_not_covered_by(self, files, reviewers):
138 """Returns the files not owned by one of the reviewers.
141 files is a sequence of paths relative to (and under) self.root.
142 reviewers is a sequence of strings matching self.email_regexp.
144 self._check_paths(files)
145 self._check_reviewers(reviewers)
146 self.load_data_needed_for(files)
148 covered_objs = self._objs_covered_by(reviewers)
149 uncovered_files = [f for f in files
150 if not self._is_obj_covered_by(f, covered_objs)]
152 return set(uncovered_files)
154 def _check_paths(self, files):
155 def _is_under(f, pfx):
156 return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
157 _assert_is_collection(files)
158 assert all(not self.os_path.isabs(f) and
159 _is_under(f, self.os_path.abspath(self.root)) for f in files)
161 def _check_reviewers(self, reviewers):
162 _assert_is_collection(reviewers)
163 assert all(self.email_regexp.match(r) for r in reviewers)
165 def _objs_covered_by(self, reviewers):
166 objs = self.owned_by[EVERYONE]
168 objs = objs | self.owned_by.get(r, set())
171 def _stop_looking(self, objname):
172 return objname in self.stop_looking
174 def _is_obj_covered_by(self, objname, covered_objs):
175 while not objname in covered_objs and not self._stop_looking(objname):
176 objname = self.os_path.dirname(objname)
177 return objname in covered_objs
179 def _enclosing_dir_with_owners(self, objname):
180 """Returns the innermost enclosing directory that has an OWNERS file."""
182 while not dirpath in self.owners_for:
183 if self._stop_looking(dirpath):
185 dirpath = self.os_path.dirname(dirpath)
188 def load_data_needed_for(self, files):
190 dirpath = self.os_path.dirname(f)
191 while not dirpath in self.owners_for:
192 self._read_owners_in_dir(dirpath)
193 if self._stop_looking(dirpath):
195 dirpath = self.os_path.dirname(dirpath)
197 def _read_owners_in_dir(self, dirpath):
198 owners_path = self.os_path.join(self.root, dirpath, 'OWNERS')
199 if not self.os_path.exists(owners_path):
204 for line in self.fopen(owners_path):
207 if line.startswith('#'):
210 comment.append(line[1:].strip())
217 if line == 'set noparent':
218 self.stop_looking.add(dirpath)
221 m = re.match('per-file (.+)=(.+)', line)
223 glob_string = m.group(1).strip()
224 directive = m.group(2).strip()
225 full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
226 if '/' in glob_string or '\\' in glob_string:
227 raise SyntaxErrorInOwnersFile(owners_path, lineno,
228 'per-file globs cannot span directories or use escapes: "%s"' %
230 baselines = self.glob(full_glob_string)
231 for baseline in (self.os_path.relpath(b, self.root) for b in baselines):
232 self._add_entry(baseline, directive, 'per-file line',
233 owners_path, lineno, '\n'.join(comment))
236 if line.startswith('set '):
237 raise SyntaxErrorInOwnersFile(owners_path, lineno,
238 'unknown option: "%s"' % line[4:].strip())
240 self._add_entry(dirpath, line, 'line', owners_path, lineno,
243 def _add_entry(self, path, directive,
244 line_type, owners_path, lineno, comment):
245 if directive == 'set noparent':
246 self.stop_looking.add(path)
247 elif self.email_regexp.match(directive) or directive == EVERYONE:
248 self.comments.setdefault(directive, {})
249 self.comments[directive][path] = comment
250 self.owned_by.setdefault(directive, set()).add(path)
251 self.owners_for.setdefault(path, set()).add(directive)
253 raise SyntaxErrorInOwnersFile(owners_path, lineno,
254 ('%s is not a "set" directive, "*", '
255 'or an email address: "%s"' % (line_type, directive)))
257 def _covering_set_of_owners_for(self, files, author):
258 dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
259 all_possible_owners = self.all_possible_owners(dirs_remaining, author)
260 suggested_owners = set()
261 while dirs_remaining:
262 owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
263 suggested_owners.add(owner)
264 dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
265 dirs_remaining -= dirs_to_remove
266 return suggested_owners
268 def all_possible_owners(self, dirs, author):
269 """Returns a list of (potential owner, distance-from-dir) tuples; a
270 distance of 1 is the lowest/closest possible distance (which makes the
271 subsequent math easier)."""
272 all_possible_owners = {}
273 for current_dir in dirs:
274 dirname = current_dir
277 for owner in self.owners_for.get(dirname, []):
278 if author and owner == author:
280 all_possible_owners.setdefault(owner, [])
281 # If the same person is in multiple OWNERS files above a given
282 # directory, only count the closest one.
283 if not any(current_dir == el[0] for el in all_possible_owners[owner]):
284 all_possible_owners[owner].append((current_dir, distance))
285 if self._stop_looking(dirname):
287 dirname = self.os_path.dirname(dirname)
289 return all_possible_owners
292 def total_costs_by_owner(all_possible_owners, dirs):
293 # We want to minimize both the number of reviewers and the distance
294 # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
295 # an arbitrarily-selected scaling factor that seems to work well - it
296 # will select one reviewer in the parent directory over three reviewers
297 # in subdirs, but not one reviewer over just two.
299 for owner in all_possible_owners:
301 num_directories_owned = 0
302 for dirname, distance in all_possible_owners[owner]:
304 total_distance += distance
305 num_directories_owned += 1
306 if num_directories_owned:
307 result[owner] = (total_distance /
308 pow(num_directories_owned, 1.75))
312 def lowest_cost_owner(all_possible_owners, dirs):
313 total_costs_by_owner = Database.total_costs_by_owner(all_possible_owners,
315 # Return the lowest cost owner. In the case of a tie, pick one randomly.
316 lowest_cost = min(total_costs_by_owner.itervalues())
317 lowest_cost_owners = filter(
318 lambda owner: total_costs_by_owner[owner] == lowest_cost,
319 total_costs_by_owner)
320 return random.Random().choice(lowest_cost_owners)