src/build/android/pylib/perf/test_runner.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """Runs perf tests.
   6
   7 Our buildbot infrastructure requires each slave to run steps serially.
   8 This is sub-optimal for android, where these steps can run independently on
   9 multiple connected devices.
  10
  11 The buildbots will run this script multiple times per cycle:
  12 - First: all steps listed in --steps in will be executed in parallel using all
  13 connected devices. Step results will be pickled to disk. Each step has a unique
  14 name. The result code will be ignored if the step name is listed in
  15 --flaky-steps.
  16 The buildbot will treat this step as a regular step, and will not process any
  17 graph data.
  18
  19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file
  20 with the step results previously saved. The buildbot will then process the graph
  21 data accordingly.
  22
  23 The JSON steps file contains a dictionary in the format:
  24 { "version": int,
  25   "steps": {
  26     "foo": {
  27       "device_affinity": int,
  28       "cmd": "script_to_execute foo"
  29     },
  30     "bar": {
  31       "device_affinity": int,
  32       "cmd": "script_to_execute bar"
  33     }
  34   }
  35 }
  36
  37 The JSON flaky steps file contains a list with step names which results should
  38 be ignored:
  39 [
  40   "step_name_foo",
  41   "step_name_bar"
  42 ]
  43
  44 Note that script_to_execute necessarily have to take at least the following
  45 option:
  46   --device: the serial number to be passed to all adb commands.
  47 """
  48
  49 import collections
  50 import datetime
  51 import json
  52 import logging
  53 import os
  54 import pickle
  55 import sys
  56 import threading
  57 import time
  58
  59 from pylib import cmd_helper
  60 from pylib import constants
  61 from pylib import forwarder
  62 from pylib.base import base_test_result
  63 from pylib.base import base_test_runner
  64 from pylib.device import device_errors
  65
  66
  67 def OutputJsonList(json_input, json_output):
  68   with file(json_input, 'r') as i:
  69     all_steps = json.load(i)
  70   step_names = all_steps['steps'].keys()
  71   with file(json_output, 'w') as o:
  72     o.write(json.dumps(step_names))
  73   return 0
  74
  75
  76 def PrintTestOutput(test_name):
  77   """Helper method to print the output of previously executed test_name.
  78
  79   Args:
  80     test_name: name of the test that has been previously executed.
  81
  82   Returns:
  83     exit code generated by the test step.
  84   """
  85   file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
  86   if not os.path.exists(file_name):
  87     logging.error('File not found %s', file_name)
  88     return 1
  89
  90   with file(file_name, 'r') as f:
  91     persisted_result = pickle.loads(f.read())
  92   logging.info('*' * 80)
  93   logging.info('Output from:')
  94   logging.info(persisted_result['cmd'])
  95   logging.info('*' * 80)
  96   print persisted_result['output']
  97
  98   return persisted_result['exit_code']
  99
 100
 101 def PrintSummary(test_names):
 102   logging.info('*' * 80)
 103   logging.info('Sharding summary')
 104   device_total_time = collections.defaultdict(int)
 105   for test_name in test_names:
 106     file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
 107     if not os.path.exists(file_name):
 108       logging.info('%s : No status file found', test_name)
 109       continue
 110     with file(file_name, 'r') as f:
 111       result = pickle.loads(f.read())
 112     logging.info('%s : exit_code=%d in %d secs at %s',
 113                  result['name'], result['exit_code'], result['total_time'],
 114                  result['device'])
 115     device_total_time[result['device']] += result['total_time']
 116   for device, device_time in device_total_time.iteritems():
 117     logging.info('Total for device %s : %d secs', device, device_time)
 118   logging.info('Total steps time: %d secs', sum(device_total_time.values()))
 119
 120
 121 class _HeartBeatLogger(object):
 122   # How often to print the heartbeat on flush().
 123   _PRINT_INTERVAL = 30.0
 124
 125   def __init__(self):
 126     """A file-like class for keeping the buildbot alive."""
 127     self._len = 0
 128     self._tick = time.time()
 129     self._stopped = threading.Event()
 130     self._timer = threading.Thread(target=self._runner)
 131     self._timer.start()
 132
 133   def _runner(self):
 134     while not self._stopped.is_set():
 135       self.flush()
 136       self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)
 137
 138   def write(self, data):
 139     self._len += len(data)
 140
 141   def flush(self):
 142     now = time.time()
 143     if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:
 144       self._tick = now
 145       print '--single-step output length %d' % self._len
 146       sys.stdout.flush()
 147
 148   def stop(self):
 149     self._stopped.set()
 150
 151
 152 class TestRunner(base_test_runner.BaseTestRunner):
 153   def __init__(self, test_options, device, shard_index, max_shard, tests,
 154       flaky_tests):
 155     """A TestRunner instance runs a perf test on a single device.
 156
 157     Args:
 158       test_options: A PerfOptions object.
 159       device: Device to run the tests.
 160       shard_index: the index of this device.
 161       max_shards: the maximum shard index.
 162       tests: a dict mapping test_name to command.
 163       flaky_tests: a list of flaky test_name.
 164     """
 165     super(TestRunner, self).__init__(device, None, 'Release')
 166     self._options = test_options
 167     self._shard_index = shard_index
 168     self._max_shard = max_shard
 169     self._tests = tests
 170     self._flaky_tests = flaky_tests
 171
 172   @staticmethod
 173   def _IsBetter(result):
 174     if result['actual_exit_code'] == 0:
 175       return True
 176     pickled = os.path.join(constants.PERF_OUTPUT_DIR,
 177                            result['name'])
 178     if not os.path.exists(pickled):
 179       return True
 180     with file(pickled, 'r') as f:
 181       previous = pickle.loads(f.read())
 182     return result['actual_exit_code'] < previous['actual_exit_code']
 183
 184   @staticmethod
 185   def _SaveResult(result):
 186     if TestRunner._IsBetter(result):
 187       with file(os.path.join(constants.PERF_OUTPUT_DIR,
 188                              result['name']), 'w') as f:
 189         f.write(pickle.dumps(result))
 190
 191   def _CheckDeviceAffinity(self, test_name):
 192     """Returns True if test_name has affinity for this shard."""
 193     affinity = (self._tests['steps'][test_name]['device_affinity'] %
 194                 self._max_shard)
 195     if self._shard_index == affinity:
 196       return True
 197     logging.info('Skipping %s on %s (affinity is %s, device is %s)',
 198                  test_name, self.device_serial, affinity, self._shard_index)
 199     return False
 200
 201   def _LaunchPerfTest(self, test_name):
 202     """Runs a perf test.
 203
 204     Args:
 205       test_name: the name of the test to be executed.
 206
 207     Returns:
 208       A tuple containing (Output, base_test_result.ResultType)
 209     """
 210     if not self._CheckDeviceAffinity(test_name):
 211       return '', base_test_result.ResultType.PASS
 212
 213     try:
 214       logging.warning('Unmapping device ports')
 215       forwarder.Forwarder.UnmapAllDevicePorts(self.device)
 216       self.device.old_interface.RestartAdbdOnDevice()
 217     except Exception as e:
 218       logging.error('Exception when tearing down device %s', e)
 219
 220     cmd = ('%s --device %s' %
 221            (self._tests['steps'][test_name]['cmd'],
 222             self.device_serial))
 223     logging.info('%s : %s', test_name, cmd)
 224     start_time = datetime.datetime.now()
 225
 226     timeout = 5400
 227     if self._options.no_timeout:
 228       timeout = None
 229     full_cmd = cmd
 230     if self._options.dry_run:
 231       full_cmd = 'echo %s' % cmd
 232
 233     logfile = sys.stdout
 234     if self._options.single_step:
 235       # Just print a heart-beat so that the outer buildbot scripts won't timeout
 236       # without response.
 237       logfile = _HeartBeatLogger()
 238     cwd = os.path.abspath(constants.DIR_SOURCE_ROOT)
 239     if full_cmd.startswith('src/'):
 240       cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir))
 241     try:
 242       exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(
 243           full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile)
 244     except cmd_helper.TimeoutError as e:
 245       exit_code = -1
 246       output = str(e)
 247     finally:
 248       if self._options.single_step:
 249         logfile.stop()
 250     end_time = datetime.datetime.now()
 251     if exit_code is None:
 252       exit_code = -1
 253     logging.info('%s : exit_code=%d in %d secs at %s',
 254                  test_name, exit_code, (end_time - start_time).seconds,
 255                  self.device_serial)
 256
 257     if exit_code == 0:
 258       result_type = base_test_result.ResultType.PASS
 259     else:
 260       result_type = base_test_result.ResultType.FAIL
 261       # Since perf tests use device affinity, give the device a chance to
 262       # recover if it is offline after a failure. Otherwise, the master sharder
 263       # will remove it from the pool and future tests on this device will fail.
 264       try:
 265         self.device.WaitUntilFullyBooted(timeout=120)
 266       except device_errors.CommandTimeoutError as e:
 267         logging.error('Device failed to return after %s: %s' % (test_name, e))
 268
 269     actual_exit_code = exit_code
 270     if test_name in self._flaky_tests:
 271       # The exit_code is used at the second stage when printing the
 272       # test output. If the test is flaky, force to "0" to get that step green
 273       # whilst still gathering data to the perf dashboards.
 274       # The result_type is used by the test_dispatcher to retry the test.
 275       exit_code = 0
 276
 277     persisted_result = {
 278         'name': test_name,
 279         'output': output,
 280         'exit_code': exit_code,
 281         'actual_exit_code': actual_exit_code,
 282         'result_type': result_type,
 283         'total_time': (end_time - start_time).seconds,
 284         'device': self.device_serial,
 285         'cmd': cmd,
 286     }
 287     self._SaveResult(persisted_result)
 288
 289     return (output, result_type)
 290
 291   def RunTest(self, test_name):
 292     """Run a perf test on the device.
 293
 294     Args:
 295       test_name: String to use for logging the test result.
 296
 297     Returns:
 298       A tuple of (TestRunResults, retry).
 299     """
 300     _, result_type = self._LaunchPerfTest(test_name)
 301     results = base_test_result.TestRunResults()
 302     results.AddResult(base_test_result.BaseTestResult(test_name, result_type))
 303     retry = None
 304     if not results.DidRunPass():
 305       retry = test_name
 306     return results, retry