4 Copyright 2013 Google Inc.
6 Use of this source code is governed by a BSD-style license that can be
7 found in the LICENSE file.
9 HTTP server for our HTML rebaseline viewer.
12 # System-level imports
28 # Imports from within Skia
29 import fix_pythonpath # must do this first
30 from pyutils import gs_utils
33 # Imports from local dir
35 # Note: we import results under a different name, to avoid confusion with the
36 # Server.results() property. See discussion at
37 # https://codereview.chromium.org/195943004/diff/1/gm/rebaseline_server/server.py#newcode44
38 import compare_configs
39 import compare_to_expectations
40 import download_actuals
42 import results as results_mod
44 PATHSPLIT_RE = re.compile('/([^/]+)/(.+)')
46 # A simple dictionary of file name extensions to MIME types. The empty string
47 # entry is used as the default when no extension was given or if the extension
48 # has no entry in this dictionary.
49 MIME_TYPE_MAP = {'': 'application/octet-stream',
53 'js': 'application/javascript',
54 'json': 'application/json'
57 # Keys that server.py uses to create the toplevel content header.
58 # NOTE: Keep these in sync with static/constants.js
59 KEY__EDITS__MODIFICATIONS = 'modifications'
60 KEY__EDITS__OLD_RESULTS_HASH = 'oldResultsHash'
61 KEY__EDITS__OLD_RESULTS_TYPE = 'oldResultsType'
63 DEFAULT_ACTUALS_DIR = results_mod.DEFAULT_ACTUALS_DIR
64 DEFAULT_GM_SUMMARIES_BUCKET = download_actuals.GM_SUMMARIES_BUCKET
65 DEFAULT_JSON_FILENAME = download_actuals.DEFAULT_JSON_FILENAME
68 PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
69 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
70 # Directory, relative to PARENT_DIRECTORY, within which the server will serve
71 # out live results (not static files).
72 RESULTS_SUBDIR = 'results'
73 # Directory, relative to PARENT_DIRECTORY, within which the server will serve
75 STATIC_CONTENTS_SUBDIR = 'static'
76 # All of the GENERATED_*_SUBDIRS are relative to STATIC_CONTENTS_SUBDIR
77 GENERATED_HTML_SUBDIR = 'generated-html'
78 GENERATED_IMAGES_SUBDIR = 'generated-images'
79 GENERATED_JSON_SUBDIR = 'generated-json'
81 # How often (in seconds) clients should reload while waiting for initial
83 RELOAD_INTERVAL_UNTIL_READY = 10
86 results_mod.KEY__HEADER__RESULTS_FAILURES,
87 results_mod.KEY__HEADER__RESULTS_ALL,
89 # If --compare-configs is specified, compare these configs.
90 CONFIG_PAIRS_TO_COMPARE = [('8888', 'gpu')]
92 _HTTP_HEADER_CONTENT_LENGTH = 'Content-Length'
93 _HTTP_HEADER_CONTENT_TYPE = 'Content-Type'
95 _SERVER = None # This gets filled in by main()
98 def _run_command(args, directory):
99 """Runs a command and returns stdout as a single string.
102 args: the command to run, as a list of arguments
103 directory: directory within which to run the command
105 Returns: stdout, as a string
107 Raises an Exception if the command failed (exited with nonzero return code).
109 logging.debug('_run_command: %s in directory %s' % (args, directory))
110 proc = subprocess.Popen(args, cwd=directory,
111 stdout=subprocess.PIPE,
112 stderr=subprocess.PIPE)
113 (stdout, stderr) = proc.communicate()
114 if proc.returncode is not 0:
115 raise Exception('command "%s" failed in dir "%s": %s' %
116 (args, directory, stderr))
120 def _get_routable_ip_address():
121 """Returns routable IP address of this host (the IP address of its network
122 interface that would be used for most traffic, not its localhost
123 interface). See http://stackoverflow.com/a/166589 """
124 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
125 sock.connect(('8.8.8.8', 80))
126 host = sock.getsockname()[0]
131 def _create_index(file_path, config_pairs):
132 """Creates an index file linking to all results available from this server.
134 Prior to https://codereview.chromium.org/215503002 , we had a static
135 index.html within our repo. But now that the results may or may not include
136 config comparisons, index.html needs to be generated differently depending
137 on which results are included.
139 TODO(epoger): Instead of including raw HTML within the Python code,
140 consider restoring the index.html file as a template and using django (or
141 similar) to fill in dynamic content.
144 file_path: path on local disk to write index to; any directory components
145 of this path that do not already exist will be created
146 config_pairs: what pairs of configs (if any) we compare actual results of
148 dir_path = os.path.dirname(file_path)
149 if not os.path.isdir(dir_path):
150 os.makedirs(dir_path)
151 with open(file_path, 'w') as file_handle:
153 '<!DOCTYPE html><html>'
154 '<head><title>rebaseline_server</title></head>'
157 file_handle.write('<li>Expectations vs Actuals</li><ul>')
158 for summary_type in SUMMARY_TYPES:
161 '<a href="/%s/view.html#/view.html?resultsToLoad=/%s/%s">'
163 STATIC_CONTENTS_SUBDIR, RESULTS_SUBDIR,
164 summary_type, summary_type))
165 file_handle.write('</ul>')
167 file_handle.write('<li>Comparing configs within actual results</li><ul>')
168 for config_pair in config_pairs:
169 file_handle.write('<li>%s vs %s:' % config_pair)
170 for summary_type in SUMMARY_TYPES:
172 ' <a href="/%s/view.html#/view.html?'
173 'resultsToLoad=/%s/%s/%s-vs-%s_%s.json">%s</a>' % (
174 STATIC_CONTENTS_SUBDIR, STATIC_CONTENTS_SUBDIR,
175 GENERATED_JSON_SUBDIR, config_pair[0], config_pair[1],
176 summary_type, summary_type))
177 file_handle.write('</li>')
178 file_handle.write('</ul>')
179 file_handle.write('</ul></body></html>')
182 class Server(object):
183 """ HTTP server for our HTML rebaseline viewer. """
186 actuals_dir=DEFAULT_ACTUALS_DIR,
187 json_filename=DEFAULT_JSON_FILENAME,
188 gm_summaries_bucket=DEFAULT_GM_SUMMARIES_BUCKET,
189 port=DEFAULT_PORT, export=False, editable=True,
190 reload_seconds=0, config_pairs=None, builder_regex_list=None):
193 actuals_dir: directory under which we will check out the latest actual
195 json_filename: basename of the JSON summary file to load for each builder
196 gm_summaries_bucket: Google Storage bucket to download json_filename
197 files from; if None or '', don't fetch new actual-results files
198 at all, just compare to whatever files are already in actuals_dir
199 port: which TCP port to listen on for HTTP requests
200 export: whether to allow HTTP clients on other hosts to access this server
201 editable: whether HTTP clients are allowed to submit new baselines
202 reload_seconds: polling interval with which to check for new results;
203 if 0, don't check for new results at all
204 config_pairs: List of (string, string) tuples; for each tuple, compare
205 actual results of these two configs. If None or empty,
206 don't compare configs at all.
207 builder_regex_list: List of regular expressions specifying which builders
208 we will process. If None, process all builders.
210 self._actuals_dir = actuals_dir
211 self._json_filename = json_filename
212 self._gm_summaries_bucket = gm_summaries_bucket
214 self._export = export
215 self._editable = editable
216 self._reload_seconds = reload_seconds
217 self._config_pairs = config_pairs or []
218 self._builder_regex_list = builder_regex_list
220 file_path=os.path.join(
221 PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR, GENERATED_HTML_SUBDIR,
223 config_pairs=config_pairs)
225 # Reentrant lock that must be held whenever updating EITHER of:
227 # 2. the expected or actual results on local disk
228 self.results_rlock = threading.RLock()
229 # self._results will be filled in by calls to update_results()
234 """ Returns the most recently generated results, or None if we don't have
235 any valid results (update_results() has not completed yet). """
239 def is_exported(self):
240 """ Returns true iff HTTP clients on other hosts are allowed to access
245 def is_editable(self):
246 """ Returns true iff HTTP clients are allowed to submit new baselines. """
247 return self._editable
250 def reload_seconds(self):
251 """ Returns the result reload period in seconds, or 0 if we don't reload
253 return self._reload_seconds
255 def update_results(self, invalidate=False):
256 """ Create or update self._results, based on the latest expectations and
259 We hold self.results_rlock while we do this, to guarantee that no other
260 thread attempts to update either self._results or the underlying files at
264 invalidate: if True, invalidate self._results immediately upon entry;
265 otherwise, we will let readers see those results until we
268 with self.results_rlock:
271 if self._gm_summaries_bucket:
273 'Updating GM result summaries in %s from gm_summaries_bucket %s ...'
274 % (self._actuals_dir, self._gm_summaries_bucket))
276 # Clean out actuals_dir first, in case some builders have gone away
278 if os.path.isdir(self._actuals_dir):
279 shutil.rmtree(self._actuals_dir)
281 # Get the list of builders we care about.
282 all_builders = download_actuals.get_builders_list(
283 summaries_bucket=self._gm_summaries_bucket)
284 if self._builder_regex_list:
285 matching_builders = []
286 for builder in all_builders:
287 for regex in self._builder_regex_list:
288 if re.match(regex, builder):
289 matching_builders.append(builder)
290 break # go on to the next builder, no need to try more regexes
292 matching_builders = all_builders
294 # Download the JSON file for each builder we care about.
296 # TODO(epoger): When this is a large number of builders, we would be
297 # better off downloading them in parallel!
298 for builder in matching_builders:
299 gs_utils.download_file(
300 source_bucket=self._gm_summaries_bucket,
301 source_path=posixpath.join(builder, self._json_filename),
302 dest_path=os.path.join(self._actuals_dir, builder,
303 self._json_filename),
304 create_subdirs_if_needed=True)
306 # We only update the expectations dir if the server was run with a
307 # nonzero --reload argument; otherwise, we expect the user to maintain
308 # her own expectations as she sees fit.
310 # Because the Skia repo is hosted using git, and git does not
311 # support updating a single directory tree, we have to update the entire
314 # Because Skia uses depot_tools, we have to update using "gclient sync"
315 # instead of raw git commands.
317 # TODO(epoger): Fetch latest expectations in some other way.
318 # Eric points out that our official documentation recommends an
319 # unmanaged Skia checkout, so "gclient sync" will not bring down updated
320 # expectations from origin/master-- you'd have to do a "git pull" of
322 # However, the live rebaseline_server at
323 # http://skia-tree-status.appspot.com/redirect/rebaseline-server (which
324 # is probably the only user of the --reload flag!) uses a managed
325 # checkout, so "gclient sync" works in that case.
326 # Probably the best idea is to avoid all of this nonsense by fetching
327 # updated expectations into a temp directory, and leaving the rest of
328 # the checkout alone. This could be done using "git show", or by
329 # downloading individual expectation JSON files from
330 # skia.googlesource.com .
331 if self._reload_seconds:
333 'Updating expected GM results in %s by syncing Skia repo ...' %
334 compare_to_expectations.DEFAULT_EXPECTATIONS_DIR)
335 _run_command(['gclient', 'sync'], TRUNK_DIRECTORY)
337 self._results = compare_to_expectations.ExpectationComparisons(
338 actuals_root=self._actuals_dir,
339 generated_images_root=os.path.join(
340 PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR,
341 GENERATED_IMAGES_SUBDIR),
342 diff_base_url=posixpath.join(
343 os.pardir, STATIC_CONTENTS_SUBDIR, GENERATED_IMAGES_SUBDIR),
344 builder_regex_list=self._builder_regex_list)
346 json_dir = os.path.join(
347 PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR, GENERATED_JSON_SUBDIR)
348 if not os.path.isdir(json_dir):
349 os.makedirs(json_dir)
351 for config_pair in self._config_pairs:
352 config_comparisons = compare_configs.ConfigComparisons(
354 actuals_root=self._actuals_dir,
355 generated_images_root=os.path.join(
356 PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR,
357 GENERATED_IMAGES_SUBDIR),
358 diff_base_url=posixpath.join(
359 os.pardir, GENERATED_IMAGES_SUBDIR),
360 builder_regex_list=self._builder_regex_list)
361 for summary_type in SUMMARY_TYPES:
363 config_comparisons.get_packaged_results_of_type(
364 results_type=summary_type),
366 json_dir, '%s-vs-%s_%s.json' % (
367 config_pair[0], config_pair[1], summary_type)))
369 def _result_loader(self, reload_seconds=0):
370 """ Call self.update_results(), either once or periodically.
373 reload_seconds: integer; if nonzero, reload results at this interval
374 (in which case, this method will never return!)
376 self.update_results()
377 logging.info('Initial results loaded. Ready for requests on %s' % self._url)
380 time.sleep(reload_seconds)
381 self.update_results()
384 arg_tuple = (self._reload_seconds,) # start_new_thread needs a tuple,
385 # even though it holds just one param
386 thread.start_new_thread(self._result_loader, arg_tuple)
389 server_address = ('', self._port)
390 host = _get_routable_ip_address()
392 logging.warning('Running with combination of "export" and "editable" '
393 'flags. Users on other machines will '
394 'be able to modify your GM expectations!')
397 server_address = (host, self._port)
398 http_server = BaseHTTPServer.HTTPServer(server_address, HTTPRequestHandler)
399 self._url = 'http://%s:%d' % (host, http_server.server_port)
400 logging.info('Listening for requests on %s' % self._url)
401 http_server.serve_forever()
404 class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
405 """ HTTP request handlers for various types of queries this server knows
406 how to handle (static HTML and Javascript, expected/actual results, etc.)
410 Handles all GET requests, forwarding them to the appropriate
413 If we see any Exceptions, return a 404. This fixes http://skbug.com/2147
416 logging.debug('do_GET: path="%s"' % self.path)
417 if self.path == '' or self.path == '/' or self.path == '/index.html' :
418 self.redirect_to('/%s/%s/index.html' % (
419 STATIC_CONTENTS_SUBDIR, GENERATED_HTML_SUBDIR))
421 if self.path == '/favicon.ico' :
422 self.redirect_to('/%s/favicon.ico' % STATIC_CONTENTS_SUBDIR)
425 # All requests must be of this form:
426 # /dispatcher/remainder
427 # where 'dispatcher' indicates which do_GET_* dispatcher to run
428 # and 'remainder' is the remaining path sent to that dispatcher.
429 normpath = posixpath.normpath(self.path)
430 (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
432 RESULTS_SUBDIR: self.do_GET_results,
433 STATIC_CONTENTS_SUBDIR: self.do_GET_static,
435 dispatcher = dispatchers[dispatcher_name]
436 dispatcher(remainder)
441 def do_GET_results(self, results_type):
442 """ Handle a GET request for GM results.
445 results_type: string indicating which set of results to return;
446 must be one of the results_mod.RESULTS_* constants
448 logging.debug('do_GET_results: sending results of type "%s"' % results_type)
449 # Since we must make multiple calls to the ExpectationComparisons object,
450 # grab a reference to it in case it is updated to point at a new
451 # ExpectationComparisons object within another thread.
453 # TODO(epoger): Rather than using a global variable for the handler
454 # to refer to the Server object, make Server a subclass of
455 # HTTPServer, and then it could be available to the handler via
456 # the handler's .server instance variable.
457 results_obj = _SERVER.results
459 response_dict = results_obj.get_packaged_results_of_type(
460 results_type=results_type, reload_seconds=_SERVER.reload_seconds,
461 is_editable=_SERVER.is_editable, is_exported=_SERVER.is_exported)
463 now = int(time.time())
465 imagepairset.KEY__ROOT__HEADER: {
466 results_mod.KEY__HEADER__SCHEMA_VERSION: (
467 results_mod.VALUE__HEADER__SCHEMA_VERSION),
468 results_mod.KEY__HEADER__IS_STILL_LOADING: True,
469 results_mod.KEY__HEADER__TIME_UPDATED: now,
470 results_mod.KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE: (
471 now + RELOAD_INTERVAL_UNTIL_READY),
474 self.send_json_dict(response_dict)
476 def do_GET_static(self, path):
477 """ Handle a GET request for a file under STATIC_CONTENTS_SUBDIR .
478 Only allow serving of files within STATIC_CONTENTS_SUBDIR that is a
479 filesystem sibling of this script.
482 path: path to file (within STATIC_CONTENTS_SUBDIR) to retrieve
484 # Strip arguments ('?resultsToLoad=all') from the path
485 path = urlparse.urlparse(path).path
487 logging.debug('do_GET_static: sending file "%s"' % path)
488 static_dir = os.path.realpath(os.path.join(
489 PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR))
490 full_path = os.path.realpath(os.path.join(static_dir, path))
491 if full_path.startswith(static_dir):
492 self.send_file(full_path)
495 'Attempted do_GET_static() of path [%s] outside of static dir [%s]'
496 % (full_path, static_dir))
500 """ Handles all POST requests, forwarding them to the appropriate
501 do_POST_* dispatcher. """
502 # All requests must be of this form:
504 # where 'dispatcher' indicates which do_POST_* dispatcher to run.
505 logging.debug('do_POST: path="%s"' % self.path)
506 normpath = posixpath.normpath(self.path)
508 '/edits': self.do_POST_edits,
511 dispatcher = dispatchers[normpath]
513 self.send_response(200)
518 def do_POST_edits(self):
519 """ Handle a POST request with modifications to GM expectations, in this
523 KEY__EDITS__OLD_RESULTS_TYPE: 'all', # type of results that the client
524 # loaded and then made
526 KEY__EDITS__OLD_RESULTS_HASH: 39850913, # hash of results when the client
527 # loaded them (ensures that the
528 # client and server apply
529 # modifications to the same base)
530 KEY__EDITS__MODIFICATIONS: [
531 # as needed by compare_to_expectations.edit_expectations()
536 Raises an Exception if there were any problems.
538 if not _SERVER.is_editable:
539 raise Exception('this server is not running in --editable mode')
541 content_type = self.headers[_HTTP_HEADER_CONTENT_TYPE]
542 if content_type != 'application/json;charset=UTF-8':
543 raise Exception('unsupported %s [%s]' % (
544 _HTTP_HEADER_CONTENT_TYPE, content_type))
546 content_length = int(self.headers[_HTTP_HEADER_CONTENT_LENGTH])
547 json_data = self.rfile.read(content_length)
548 data = json.loads(json_data)
549 logging.debug('do_POST_edits: received new GM expectations data [%s]' %
552 # Update the results on disk with the information we received from the
554 # We must hold _SERVER.results_rlock while we do this, to guarantee that
555 # no other thread updates expectations (from the Skia repo) while we are
556 # updating them (using the info we received from the client).
557 with _SERVER.results_rlock:
558 oldResultsType = data[KEY__EDITS__OLD_RESULTS_TYPE]
559 oldResults = _SERVER.results.get_results_of_type(oldResultsType)
560 oldResultsHash = str(hash(repr(
561 oldResults[imagepairset.KEY__ROOT__IMAGEPAIRS])))
562 if oldResultsHash != data[KEY__EDITS__OLD_RESULTS_HASH]:
563 raise Exception('results of type "%s" changed while the client was '
564 'making modifications. The client should reload the '
565 'results and submit the modifications again.' %
567 _SERVER.results.edit_expectations(data[KEY__EDITS__MODIFICATIONS])
569 # Read the updated results back from disk.
570 # We can do this in a separate thread; we should return our success message
571 # to the UI as soon as possible.
572 thread.start_new_thread(_SERVER.update_results, (True,))
574 def redirect_to(self, url):
575 """ Redirect the HTTP client to a different url.
578 url: URL to redirect the HTTP client to
580 self.send_response(301)
581 self.send_header('Location', url)
584 def send_file(self, path):
585 """ Send the contents of the file at this path, with a mimetype based
586 on the filename extension.
589 path: path of file whose contents to send to the HTTP client
591 # Grab the extension if there is one
592 extension = os.path.splitext(path)[1]
593 if len(extension) >= 1:
594 extension = extension[1:]
596 # Determine the MIME type of the file from its extension
597 mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
599 # Open the file and send it over HTTP
600 if os.path.isfile(path):
601 with open(path, 'rb') as sending_file:
602 self.send_response(200)
603 self.send_header('Content-type', mime_type)
605 self.wfile.write(sending_file.read())
609 def send_json_dict(self, json_dict):
610 """ Send the contents of this dictionary in JSON format, with a JSON
614 json_dict: dictionary to send
616 self.send_response(200)
617 self.send_header('Content-type', 'application/json')
619 json.dump(json_dict, self.wfile)
623 logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
624 datefmt='%m/%d/%Y %H:%M:%S',
626 parser = argparse.ArgumentParser()
627 parser.add_argument('--actuals-dir',
628 help=('Directory into which we will check out the latest '
629 'actual GM results. If this directory does not '
630 'exist, it will be created. Defaults to %(default)s'),
631 default=DEFAULT_ACTUALS_DIR)
632 # TODO(epoger): Before https://codereview.chromium.org/310093003 ,
633 # when this tool downloaded the JSON summaries from skia-autogen,
634 # it had an --actuals-revision the caller could specify to download
635 # actual results as of a specific point in time. We should add similar
636 # functionality when retrieving the summaries from Google Storage.
637 parser.add_argument('--builders', metavar='BUILDER_REGEX', nargs='+',
638 help=('Only process builders matching these regular '
639 'expressions. If unspecified, process all '
641 parser.add_argument('--compare-configs', action='store_true',
642 help=('In addition to generating differences between '
643 'expectations and actuals, also generate '
644 'differences between these config pairs: '
645 + str(CONFIG_PAIRS_TO_COMPARE)))
646 parser.add_argument('--editable', action='store_true',
647 help=('Allow HTTP clients to submit new baselines.'))
648 parser.add_argument('--export', action='store_true',
649 help=('Instead of only allowing access from HTTP clients '
650 'on localhost, allow HTTP clients on other hosts '
651 'to access this server. WARNING: doing so will '
652 'allow users on other hosts to modify your '
653 'GM expectations, if combined with --editable.'))
654 parser.add_argument('--gm-summaries-bucket',
655 help=('Google Cloud Storage bucket to download '
656 'JSON_FILENAME files from. '
657 'Defaults to %(default)s ; if set to '
658 'empty string, just compare to actual-results '
659 'already found in ACTUALS_DIR.'),
660 default=DEFAULT_GM_SUMMARIES_BUCKET)
661 parser.add_argument('--json-filename',
662 help=('JSON summary filename to read for each builder; '
663 'defaults to %(default)s.'),
664 default=DEFAULT_JSON_FILENAME)
665 parser.add_argument('--port', type=int,
666 help=('Which TCP port to listen on for HTTP requests; '
667 'defaults to %(default)s'),
668 default=DEFAULT_PORT)
669 parser.add_argument('--reload', type=int,
670 help=('How often (a period in seconds) to update the '
671 'results. If specified, both expected and actual '
672 'results will be updated by running "gclient sync" '
673 'on your Skia checkout as a whole. '
674 'By default, we do not reload at all, and you '
675 'must restart the server to pick up new data.'),
677 args = parser.parse_args()
678 if args.compare_configs:
679 config_pairs = CONFIG_PAIRS_TO_COMPARE
684 _SERVER = Server(actuals_dir=args.actuals_dir,
685 json_filename=args.json_filename,
686 gm_summaries_bucket=args.gm_summaries_bucket,
687 port=args.port, export=args.export, editable=args.editable,
688 reload_seconds=args.reload, config_pairs=config_pairs,
689 builder_regex_list=args.builders)
693 if __name__ == '__main__':