src/third_party/chromite/scripts/gather_builder_stats.py

   1 # Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """Script for gathering stats from builder runs."""
   6
   7 from __future__ import division
   8 from __future__ import print_function
   9
  10 import collections
  11 import datetime
  12 import logging
  13 import numpy
  14 import os
  15 import re
  16 import sys
  17
  18 from chromite.cbuildbot import cbuildbot_config
  19 from chromite.cbuildbot import metadata_lib
  20 from chromite.cbuildbot import constants
  21 from chromite.lib import commandline
  22 from chromite.lib import cros_build_lib
  23 from chromite.lib import gdata_lib
  24 from chromite.lib import graphite
  25 from chromite.lib import gs
  26 from chromite.lib import table
  27
  28
  29 # Useful config targets.
  30 CQ_MASTER = constants.CQ_MASTER
  31 PFQ_MASTER = 'x86-generic-chromium-pfq'
  32
  33 # Useful google storage locations.
  34 PRE_CQ_GROUP_GS_LOCATION = constants.PRE_CQ_GROUP_GS_LOCATION
  35
  36 # Bot types
  37 CQ = constants.CQ
  38 PRE_CQ = constants.PRE_CQ
  39 PFQ = constants.PFQ_TYPE
  40
  41 # Number of parallel processes used when uploading/downloading GS files.
  42 MAX_PARALLEL = 40
  43
  44 # The graphite graphs use seconds since epoch start as time value.
  45 EPOCH_START = metadata_lib.EPOCH_START
  46
  47 # Formats we like for output.
  48 NICE_DATE_FORMAT = metadata_lib.NICE_DATE_FORMAT
  49 NICE_TIME_FORMAT = metadata_lib.NICE_TIME_FORMAT
  50 NICE_DATETIME_FORMAT = metadata_lib.NICE_DATETIME_FORMAT
  51
  52 # Spreadsheet keys
  53 # CQ master and slaves both use the same spreadsheet
  54 CQ_SS_KEY = '0AsXDKtaHikmcdElQWVFuT21aMlFXVTN5bVhfQ2ptVFE'
  55 PFQ_SS_KEY = '0AhFPeDq6pmwxdDdrYXk3cnJJV05jN3Zja0s5VjFfNlE'
  56
  57
  58 # These are the preferred base URLs we use to canonicalize bugs/CLs.
  59 BUGANIZER_BASE_URL = 'b/'
  60 GUTS_BASE_URL = 't/'
  61 CROS_BUG_BASE_URL = 'crbug.com/'
  62 INTERNAL_CL_BASE_URL = 'crosreview.com/i/'
  63 EXTERNAL_CL_BASE_URL = 'crosreview.com/'
  64
  65
  66 class GatherStatsError(Exception):
  67   """Base exception class for exceptions in this module."""
  68
  69
  70 class DataError(GatherStatsError):
  71   """Any exception raised when an error occured while collectring data."""
  72
  73
  74 class SpreadsheetError(GatherStatsError):
  75   """Raised when there is a problem with the stats spreadsheet."""
  76
  77
  78 class BadData(DataError):
  79   """Raised when a json file is still running."""
  80
  81
  82 class NoDataError(DataError):
  83   """Returned if a manifest file does not exist."""
  84
  85
  86 def _SendToCarbon(builds, token_funcs):
  87   """Send data for |builds| to Carbon/Graphite according to |token_funcs|.
  88
  89   Args:
  90     builds: List of BuildData objects.
  91     token_funcs: List of functors that each take a BuildData object as the only
  92       argument and return a string.  Each line of data to send to Carbon is
  93       constructed by taking the strings returned from these functors and
  94       concatenating them using single spaces.
  95   """
  96   lines = [' '.join([str(func(b)) for func in token_funcs]) for b in builds]
  97   cros_build_lib.Info('Sending %d lines to Graphite now.', len(lines))
  98   graphite.SendToCarbon(lines)
  99
 100
 101 # TODO(dgarrett): Discover list from Json. Will better track slave changes.
 102 def _GetSlavesOfMaster(master_target):
 103   """Returns list of slave config names for given master config name.
 104
 105   Args:
 106     master_target: Name of master config target.
 107
 108   Returns:
 109     List of names of slave config targets.
 110   """
 111   master_config = cbuildbot_config.config[master_target]
 112   slave_configs = cbuildbot_config.GetSlavesForMaster(master_config)
 113   return sorted(slave_config.name for slave_config in slave_configs)
 114
 115
 116 class StatsTable(table.Table):
 117   """Stats table for any specific target on a waterfall."""
 118
 119   LINK_ROOT = ('https://uberchromegw.corp.google.com/i/%(waterfall)s/builders/'
 120                '%(target)s/builds')
 121
 122   @staticmethod
 123   def _SSHyperlink(link, text):
 124     return '=HYPERLINK("%s", "%s")' % (link, text)
 125
 126   def __init__(self, target, waterfall, columns):
 127     super(StatsTable, self).__init__(columns, target)
 128     self.target = target
 129     self.waterfall = waterfall
 130
 131   def GetBuildLink(self, build_number):
 132     target = self.target.replace(' ', '%20')
 133     link = self.LINK_ROOT % {'waterfall': self.waterfall, 'target': target}
 134     link += '/%s' % build_number
 135     return link
 136
 137   def GetBuildSSLink(self, build_number):
 138     link = self.GetBuildLink(build_number)
 139     return self._SSHyperlink(link, 'build %s' % build_number)
 140
 141
 142 class SpreadsheetMasterTable(StatsTable):
 143   """Stats table for master builder that puts results in a spreadsheet."""
 144   # Bump this number whenever this class adds new data columns, or changes
 145   # the values of existing data columns.
 146   SHEETS_VERSION = 2
 147
 148   # These must match up with the column names on the spreadsheet.
 149   COL_BUILD_NUMBER = 'build number'
 150   COL_BUILD_LINK = 'build link'
 151   COL_STATUS = 'status'
 152   COL_START_DATETIME = 'start datetime'
 153   COL_RUNTIME_MINUTES = 'runtime minutes'
 154   COL_WEEKDAY = 'weekday'
 155   COL_CHROMEOS_VERSION = 'chromeos version'
 156   COL_CHROME_VERSION = 'chrome version'
 157   COL_FAILED_STAGES = 'failed stages'
 158   COL_FAILURE_MESSAGE = 'failure message'
 159
 160   # It is required that the ID_COL be an integer value.
 161   ID_COL = COL_BUILD_NUMBER
 162
 163   COLUMNS = (
 164      COL_BUILD_NUMBER,
 165      COL_BUILD_LINK,
 166      COL_STATUS,
 167      COL_START_DATETIME,
 168      COL_RUNTIME_MINUTES,
 169      COL_WEEKDAY,
 170      COL_CHROMEOS_VERSION,
 171      COL_CHROME_VERSION,
 172      COL_FAILED_STAGES,
 173      COL_FAILURE_MESSAGE,
 174   )
 175
 176   def __init__(self, target, waterfall, columns=None):
 177     columns = columns or []
 178     columns = list(SpreadsheetMasterTable.COLUMNS) + columns
 179     super(SpreadsheetMasterTable, self).__init__(target,
 180                                                  waterfall,
 181                                                  columns)
 182
 183     self._slaves = []
 184
 185   def _CreateAbortedRowDict(self, build_number):
 186     """Create a row dict to represent an aborted run of |build_number|."""
 187     return {
 188         self.COL_BUILD_NUMBER: str(build_number),
 189         self.COL_BUILD_LINK: self.GetBuildSSLink(build_number),
 190         self.COL_STATUS: 'aborted',
 191     }
 192
 193   def AppendGapRow(self, build_number):
 194     """Append a row to represent a missing run of |build_number|."""
 195     self.AppendRow(self._CreateAbortedRowDict(build_number))
 196
 197   def AppendBuildRow(self, build_data):
 198     """Append a row from the given |build_data|.
 199
 200     Args:
 201       build_data: A BuildData object.
 202     """
 203     # First see if any build number gaps are in the table, and if so fill
 204     # them in.  This happens when a CQ run is aborted and never writes metadata.
 205     num_rows = self.GetNumRows()
 206     if num_rows:
 207       last_row = self.GetRowByIndex(num_rows - 1)
 208       last_build_number = int(last_row[self.COL_BUILD_NUMBER])
 209
 210       for bn in range(build_data.build_number + 1, last_build_number):
 211         self.AppendGapRow(bn)
 212
 213     row = self._GetBuildRow(build_data)
 214
 215     #Use a separate column for each slave.
 216     slaves = build_data.slaves
 217     for slave_name in slaves:
 218       # This adds the slave to our local data, but doesn't add a missing
 219       # column to the spreadsheet itself.
 220       self._EnsureSlaveKnown(slave_name)
 221
 222     # Now add the finished row to this table.
 223     self.AppendRow(row)
 224
 225   def _GetBuildRow(self, build_data):
 226     """Fetch a row dictionary from |build_data|
 227
 228     Returns:
 229       A dictionary of the form {column_name : value}
 230     """
 231     build_number = build_data.build_number
 232     build_link = self.GetBuildSSLink(build_number)
 233
 234     # For datetime.weekday(), 0==Monday and 6==Sunday.
 235     is_weekday = build_data.start_datetime.weekday() in range(5)
 236
 237     row = {
 238         self.COL_BUILD_NUMBER: str(build_number),
 239         self.COL_BUILD_LINK: build_link,
 240         self.COL_STATUS: build_data.status,
 241         self.COL_START_DATETIME: build_data.start_datetime_str,
 242         self.COL_RUNTIME_MINUTES: str(build_data.runtime_minutes),
 243         self.COL_WEEKDAY: str(is_weekday),
 244         self.COL_CHROMEOS_VERSION: build_data.chromeos_version,
 245         self.COL_CHROME_VERSION: build_data.chrome_version,
 246         self.COL_FAILED_STAGES: ' '.join(build_data.GetFailedStages()),
 247         self.COL_FAILURE_MESSAGE: build_data.failure_message,
 248     }
 249
 250     slaves = build_data.slaves
 251     for slave_name in slaves:
 252       slave = slaves[slave_name]
 253       slave_url = slave.get('dashboard_url')
 254
 255       # For some reason status in slaves uses pass/fail instead of
 256       # passed/failed used elsewhere in metadata.
 257       translate_dict = {'fail': 'failed', 'pass': 'passed'}
 258       slave_status = translate_dict.get(slave['status'], slave['status'])
 259
 260       # Bizarrely, dashboard_url is not always set for slaves that pass.
 261       # Only sometimes.  crbug.com/350939.
 262       if slave_url:
 263         row[slave_name] = self._SSHyperlink(slave_url, slave_status)
 264       else:
 265         row[slave_name] = slave_status
 266
 267     return row
 268
 269   def _EnsureSlaveKnown(self, slave_name):
 270     """Ensure that a slave builder name is known.
 271
 272     Args:
 273       slave_name: The name of the slave builder (aka spreadsheet column name).
 274     """
 275     if not self.HasColumn(slave_name):
 276       self._slaves.append(slave_name)
 277       self.AppendColumn(slave_name)
 278
 279   def GetSlaves(self):
 280     """Get the list of slave builders which has been discovered so far.
 281
 282     This list is only fully populated when all row data has been fully
 283     populated.
 284
 285     Returns:
 286       List of column names for slave builders.
 287     """
 288     return self._slaves[:]
 289
 290
 291 class PFQMasterTable(SpreadsheetMasterTable):
 292   """Stats table for the CQ Master."""
 293   SS_KEY = PFQ_SS_KEY
 294
 295   WATERFALL = 'chromeos'
 296   # Must match up with name in waterfall.
 297   TARGET = 'x86-generic nightly chromium PFQ'
 298
 299   WORKSHEET_NAME = 'PFQMasterData'
 300
 301   # Bump this number whenever this class adds new data columns, or changes
 302   # the values of existing data columns.
 303   SHEETS_VERSION = SpreadsheetMasterTable.SHEETS_VERSION + 1
 304
 305   # These columns are in addition to those inherited from
 306   # SpreadsheetMasterTable
 307   COLUMNS = ()
 308
 309   def __init__(self):
 310     super(PFQMasterTable, self).__init__(PFQMasterTable.TARGET,
 311                                          PFQMasterTable.WATERFALL,
 312                                          list(PFQMasterTable.COLUMNS))
 313
 314
 315 class CQMasterTable(SpreadsheetMasterTable):
 316   """Stats table for the CQ Master."""
 317   WATERFALL = 'chromeos'
 318   TARGET = 'CQ master' # Must match up with name in waterfall.
 319
 320   WORKSHEET_NAME = 'CQMasterData'
 321
 322   # Bump this number whenever this class adds new data columns, or changes
 323   # the values of existing data columns.
 324   SHEETS_VERSION = SpreadsheetMasterTable.SHEETS_VERSION + 2
 325
 326   COL_CL_COUNT = 'cl count'
 327   COL_CL_SUBMITTED_COUNT = 'cls submitted'
 328   COL_CL_REJECTED_COUNT = 'cls rejected'
 329
 330   # These columns are in addition to those inherited from
 331   # SpreadsheetMasterTable
 332   COLUMNS = (
 333       COL_CL_COUNT,
 334       COL_CL_SUBMITTED_COUNT,
 335       COL_CL_REJECTED_COUNT,
 336   )
 337
 338   def __init__(self):
 339     super(CQMasterTable, self).__init__(CQMasterTable.TARGET,
 340                                         CQMasterTable.WATERFALL,
 341                                         list(CQMasterTable.COLUMNS))
 342
 343   def _GetBuildRow(self, build_data):
 344     """Fetch a row dictionary from |build_data|
 345
 346     Returns:
 347       A dictionary of the form {column_name : value}
 348     """
 349     row = super(CQMasterTable, self)._GetBuildRow(build_data)
 350     row[self.COL_CL_COUNT] = str(build_data.count_changes)
 351
 352     cl_actions = [metadata_lib.CLActionTuple(*x)
 353                   for x in build_data['cl_actions']]
 354     submitted_cl_count = len([x for x in cl_actions if
 355                               x.action == constants.CL_ACTION_SUBMITTED])
 356     rejected_cl_count = len([x for x in cl_actions
 357                              if x.action == constants.CL_ACTION_KICKED_OUT])
 358     row[self.COL_CL_SUBMITTED_COUNT] = str(submitted_cl_count)
 359     row[self.COL_CL_REJECTED_COUNT] = str(rejected_cl_count)
 360     return row
 361
 362
 363 class SSUploader(object):
 364   """Uploads data from table object to Google spreadsheet."""
 365
 366   __slots__ = ('_creds',          # gdata_lib.Creds object
 367                '_scomm',          # gdata_lib.SpreadsheetComm object
 368                'ss_key',          # Spreadsheet key string
 369                )
 370
 371   SOURCE = 'Gathered from builder metadata'
 372   HYPERLINK_RE = re.compile(r'=HYPERLINK\("[^"]+", "([^"]+)"\)')
 373   DATETIME_FORMATS = ('%m/%d/%Y %H:%M:%S', NICE_DATETIME_FORMAT)
 374
 375   def __init__(self, creds, ss_key):
 376     self._creds = creds
 377     self.ss_key = ss_key
 378     self._scomm = None
 379
 380   @classmethod
 381   def _ValsEqual(cls, val1, val2):
 382     """Compare two spreadsheet values and return True if they are the same.
 383
 384     This is non-trivial because of the automatic changes that Google Sheets
 385     does to values.
 386
 387     Args:
 388       val1: New or old spreadsheet value to compare.
 389       val2: New or old spreadsheet value to compare.
 390
 391     Returns:
 392       True if val1 and val2 are effectively the same, False otherwise.
 393     """
 394     # An empty string sent to spreadsheet comes back as None.  In any case,
 395     # treat two false equivalents as equal.
 396     if not (val1 or val2):
 397       return True
 398
 399     # If only one of the values is set to anything then they are not the same.
 400     if bool(val1) != bool(val2):
 401       return False
 402
 403     # If values are equal then we are done.
 404     if val1 == val2:
 405       return True
 406
 407     # Ignore case differences.  This is because, for example, the
 408     # spreadsheet automatically changes "True" to "TRUE".
 409     if val1 and val2 and val1.lower() == val2.lower():
 410       return True
 411
 412     # If either value is a HYPERLINK, then extract just the text for comparison
 413     # because that is all the spreadsheet says the value is.
 414     match = cls.HYPERLINK_RE.search(val1)
 415     if match:
 416       return match.group(1) == val2
 417     match = cls.HYPERLINK_RE.search(val2)
 418     if match:
 419       return match.group(2) == val1
 420
 421     # See if the strings are two different representations of the same datetime.
 422     dt1, dt2 = None, None
 423     for dt_format in cls.DATETIME_FORMATS:
 424       try:
 425         dt1 = datetime.datetime.strptime(val1, dt_format)
 426       except ValueError:
 427         pass
 428       try:
 429         dt2 = datetime.datetime.strptime(val2, dt_format)
 430       except ValueError:
 431         pass
 432     if dt1 and dt2 and dt1 == dt2:
 433       return True
 434
 435     # If we get this far then the values are just different.
 436     return False
 437
 438   def _Connect(self, ws_name):
 439     """Establish connection to specific worksheet.
 440
 441     Args:
 442       ws_name: Worksheet name.
 443     """
 444     if self._scomm:
 445       self._scomm.SetCurrentWorksheet(ws_name)
 446     else:
 447       self._scomm = gdata_lib.SpreadsheetComm()
 448       self._scomm.Connect(self._creds, self.ss_key, ws_name, source=self.SOURCE)
 449
 450   def GetRowCacheByCol(self, ws_name, key):
 451     """Fetch the row cache with id=|key|."""
 452     self._Connect(ws_name)
 453     ss_key = gdata_lib.PrepColNameForSS(key)
 454     return self._scomm.GetRowCacheByCol(ss_key)
 455
 456   def _EnsureColumnsExist(self, data_columns):
 457     """Ensures that |data_columns| exist in current spreadsheet worksheet.
 458
 459     Assumes spreadsheet worksheet is already connected.
 460
 461     Raises:
 462       SpreadsheetError if any column in |data_columns| is missing from
 463       the spreadsheet's current worksheet.
 464     """
 465     ss_cols = self._scomm.GetColumns()
 466
 467     # Make sure all columns in data_table are supported in spreadsheet.
 468     missing_cols = [c for c in data_columns
 469                     if gdata_lib.PrepColNameForSS(c) not in ss_cols]
 470     if missing_cols:
 471       raise SpreadsheetError('Spreadsheet missing column(s): %s' %
 472                              ', '.join(missing_cols))
 473
 474   def UploadColumnToWorksheet(self, ws_name, colIx, data):
 475     """Upload list |data| to column number |colIx| in worksheet |ws_name|.
 476
 477     This will overwrite any existing data in that column.
 478     """
 479     self._Connect(ws_name)
 480     self._scomm.WriteColumnToWorksheet(colIx, data)
 481
 482   def UploadSequentialRows(self, ws_name, data_table):
 483     """Upload |data_table| to the |ws_name| worksheet of sheet at self.ss_key.
 484
 485     Data will be uploaded row-by-row in ascending ID_COL order. Missing
 486     values of ID_COL will be filled in by filler rows.
 487
 488     Args:
 489       ws_name: Worksheet name for identifying worksheet within spreadsheet.
 490       data_table: table.Table object with rows to upload to worksheet.
 491     """
 492     self._Connect(ws_name)
 493     cros_build_lib.Info('Uploading stats rows to worksheet "%s" of spreadsheet'
 494                         ' "%s" now.', self._scomm.ws_name, self._scomm.ss_key)
 495
 496     cros_build_lib.Debug('Getting cache of current spreadsheet contents.')
 497     id_col = data_table.ID_COL
 498     ss_id_col = gdata_lib.PrepColNameForSS(id_col)
 499     ss_row_cache = self._scomm.GetRowCacheByCol(ss_id_col)
 500
 501     self._EnsureColumnsExist(data_table.GetColumns())
 502
 503     # First see if a build_number is being skipped.  Allow the data_table to
 504     # add default (filler) rows if it wants to.  These rows may represent
 505     # aborted runs, for example.  ID_COL is assumed to hold integers.
 506     first_id_val = int(data_table[-1][id_col])
 507     prev_id_val = first_id_val - 1
 508     while str(prev_id_val) not in ss_row_cache:
 509       data_table.AppendGapRow(prev_id_val)
 510       prev_id_val -= 1
 511       # Sanity check that we have not created an infinite loop.
 512       assert prev_id_val >= 0
 513
 514     # Spreadsheet is organized with oldest runs at the top and newest runs
 515     # at the bottom (because there is no interface for inserting new rows at
 516     # the top).  This is the reverse of data_table, so start at the end.
 517     for row in data_table[::-1]:
 518       row_dict = dict((gdata_lib.PrepColNameForSS(key), row[key])
 519                       for key in row)
 520
 521       # See if row with the same id_col value already exists.
 522       id_val = row[id_col]
 523       ss_row = ss_row_cache.get(id_val)
 524
 525       try:
 526         if ss_row:
 527           # Row already exists in spreadsheet.  See if contents any different.
 528           # Create dict representing values in row_dict different from ss_row.
 529           row_delta = dict((k, v) for k, v in row_dict.iteritems()
 530                            if not self._ValsEqual(v, ss_row[k]))
 531           if row_delta:
 532             cros_build_lib.Debug('Updating existing spreadsheet row for %s %s.',
 533                                  id_col, id_val)
 534             self._scomm.UpdateRowCellByCell(ss_row.ss_row_num, row_delta)
 535           else:
 536             cros_build_lib.Debug('Unchanged existing spreadsheet row for'
 537                                  ' %s %s.', id_col, id_val)
 538         else:
 539           cros_build_lib.Debug('Adding spreadsheet row for %s %s.',
 540                                id_col, id_val)
 541           self._scomm.InsertRow(row_dict)
 542       except gdata_lib.SpreadsheetError as e:
 543         cros_build_lib.Error('Failure while uploading spreadsheet row for'
 544                              ' %s %s with data %s. Error: %s.', id_col, id_val,
 545                              row_dict, e)
 546
 547
 548 class StatsManager(object):
 549   """Abstract class for managing stats for one config target.
 550
 551   Subclasses should specify the config target by passing them in to __init__.
 552
 553   This class handles the following duties:
 554     1) Read a bunch of metadata.json URLs for the config target that are
 555        are no older than the given start date.
 556     2) Upload data to a Google Sheet, if specified by the subclass.
 557     3) Upload data to Graphite, if specified by the subclass.
 558   """
 559   # Subclasses can overwrite any of these.
 560   TABLE_CLASS = None
 561   UPLOAD_ROW_PER_BUILD = False
 562   # To be overridden by subclass. A dictionary mapping a |key| from
 563   # self.summary to (ws_name, colIx) tuples from the spreadsheet which
 564   # should be overwritten with the data from the self.summary[key]
 565   SUMMARY_SPREADSHEET_COLUMNS = {}
 566   CARBON_FUNCS_BY_VERSION = None
 567   BOT_TYPE = None
 568
 569   # Whether to grab a count of what data has been written to sheets before.
 570   # This is needed if you are writing data to the Google Sheets spreadsheet.
 571   GET_SHEETS_VERSION = True
 572
 573   def __init__(self, config_target, ss_key=None,
 574                no_sheets_version_filter=False):
 575     self.builds = []
 576     self.gs_ctx = gs.GSContext()
 577     self.config_target = config_target
 578     self.ss_key = ss_key
 579     self.no_sheets_version_filter = no_sheets_version_filter
 580     self.summary = {}
 581
 582
 583   #pylint: disable-msg=W0613
 584   def Gather(self, start_date, sort_by_build_number=True,
 585              starting_build_number=0, creds=None):
 586     """Fetches build data into self.builds.
 587
 588     Args:
 589       start_date: A datetime.date instance for the earliest build to
 590                   examine.
 591       sort_by_build_number: Optional boolean. If True, builds will be
 592                             sorted by build number.
 593       starting_build_number: The lowest build number to include in
 594                              self.builds.
 595       creds: Login credentials as returned by _PrepareCreds. (optional)
 596     """
 597     self.builds = self._FetchBuildData(start_date, self.config_target,
 598                                        self.gs_ctx)
 599
 600     if sort_by_build_number:
 601       # Sort runs by build_number, from newest to oldest.
 602       cros_build_lib.Info('Sorting by build number now.')
 603       self.builds = sorted(self.builds, key=lambda b: b.build_number,
 604                            reverse=True)
 605     if starting_build_number:
 606       cros_build_lib.Info('Filtering to include builds after %s (inclusive).',
 607                           starting_build_number)
 608       self.builds = filter(lambda b: b.build_number >= starting_build_number,
 609                            self.builds)
 610
 611   def CollectActions(self):
 612     """Collects the CL actions from the set of gathered builds.
 613
 614     Returns a list of CLActionWithBuildTuple for all the actions in the
 615     gathered builds.
 616     """
 617     actions = []
 618     for b in self.builds:
 619       if not 'cl_actions' in b.metadata_dict:
 620         logging.warn('No cl_actions for metadata at %s.', b.metadata_url)
 621         continue
 622       for a in b.metadata_dict['cl_actions']:
 623         actions.append(metadata_lib.CLActionWithBuildTuple(*a,
 624             bot_type=self.BOT_TYPE, build=b))
 625
 626     return actions
 627
 628   def CollateActions(self, actions):
 629     """Collates a list of actions into per-patch and per-cl actions.
 630
 631     Returns a tuple (per_patch_actions, per_cl_actions) where each are
 632     a dictionary mapping patches or cls to a list of CLActionWithBuildTuple
 633     sorted in order of ascending timestamp.
 634     """
 635     per_patch_actions = {}
 636     per_cl_actions = {}
 637     for a in actions:
 638       change_dict = a.change.copy()
 639       change_with_patch = metadata_lib.GerritPatchTuple(**change_dict)
 640       change_dict.pop('patch_number')
 641       change_no_patch = metadata_lib.GerritChangeTuple(**change_dict)
 642
 643       per_patch_actions.setdefault(change_with_patch, []).append(a)
 644       per_cl_actions.setdefault(change_no_patch, []).append(a)
 645
 646     for p in [per_cl_actions, per_patch_actions]:
 647       for k, v in p.iteritems():
 648         p[k] = sorted(v, key=lambda x: x.timestamp)
 649
 650     return (per_patch_actions, per_cl_actions)
 651
 652   @classmethod
 653   def _FetchBuildData(cls, start_date, config_target, gs_ctx):
 654     """Fetches BuildData for builds of |config_target| since |start_date|.
 655
 656     Args:
 657       start_date: A datetime.date instance.
 658       config_target: String config name to fetch metadata for.
 659       gs_ctx: A gs.GSContext instance.
 660
 661     Returns:
 662       A list of of metadata_lib.BuildData objects that were fetched.
 663     """
 664     cros_build_lib.Info('Gathering data for %s since %s', config_target,
 665                         start_date)
 666     urls = metadata_lib.GetMetadataURLsSince(config_target,
 667                                                    start_date)
 668     cros_build_lib.Info('Found %d metadata.json URLs to process.\n'
 669                         '  From: %s\n  To  : %s', len(urls), urls[0], urls[-1])
 670
 671     builds = metadata_lib.BuildData.ReadMetadataURLs(
 672         urls, gs_ctx, get_sheets_version=cls.GET_SHEETS_VERSION)
 673     cros_build_lib.Info('Read %d total metadata files.', len(builds))
 674     return builds
 675
 676   # TODO(akeshet): Return statistics in dictionary rather than just printing
 677   # them.
 678   def Summarize(self):
 679     """Process and print a summary of statistics.
 680
 681     Returns:
 682       An empty dictionary. Note: subclasses can extend this method and return
 683       non-empty dictionaries, with summarized statistics.
 684     """
 685     if self.builds:
 686       cros_build_lib.Info('%d total runs included, from build %d to %d.',
 687                           len(self.builds), self.builds[-1].build_number,
 688                           self.builds[0].build_number)
 689       total_passed = len([b for b in self.builds if b.Passed()])
 690       cros_build_lib.Info('%d of %d runs passed.', total_passed,
 691                           len(self.builds))
 692     else:
 693       cros_build_lib.Info('No runs included.')
 694     return {}
 695
 696   @property
 697   def sheets_version(self):
 698     if self.TABLE_CLASS:
 699       return self.TABLE_CLASS.SHEETS_VERSION
 700
 701     return -1
 702
 703   @property
 704   def carbon_version(self):
 705     if self.CARBON_FUNCS_BY_VERSION:
 706       return len(self.CARBON_FUNCS_BY_VERSION) - 1
 707
 708     return -1
 709
 710   def UploadToSheet(self, creds):
 711     assert creds
 712
 713     if self.UPLOAD_ROW_PER_BUILD:
 714       self._UploadBuildsToSheet(creds)
 715
 716     if self.SUMMARY_SPREADSHEET_COLUMNS:
 717       self._UploadSummaryColumns(creds)
 718
 719   def _UploadBuildsToSheet(self, creds):
 720     """Upload row-per-build data to adsheet."""
 721     if not self.TABLE_CLASS:
 722       cros_build_lib.Debug('No Spreadsheet uploading configured for %s.',
 723                            self.config_target)
 724       return
 725
 726     # Filter for builds that need to send data to Sheets (unless overridden
 727     # by command line flag.
 728     if self.no_sheets_version_filter:
 729       builds = self.builds
 730     else:
 731       version = self.sheets_version
 732       builds = [b for b in self.builds if b.sheets_version < version]
 733       cros_build_lib.Info('Found %d builds that need to send Sheets v%d data.',
 734                           len(builds), version)
 735
 736     if builds:
 737       # Fill a data table of type table_class from self.builds.
 738       # pylint: disable=E1102
 739       data_table = self.TABLE_CLASS()
 740       for build in builds:
 741         try:
 742           data_table.AppendBuildRow(build)
 743         except Exception:
 744           cros_build_lib.Error('Failed to add row for builder_number %s to'
 745                                ' table.  It came from %s.', build.build_number,
 746                                build.metadata_url)
 747           raise
 748
 749       # Upload data table to sheet.
 750       uploader = SSUploader(creds, self.ss_key)
 751       uploader.UploadSequentialRows(data_table.WORKSHEET_NAME, data_table)
 752
 753   def _UploadSummaryColumns(self, creds):
 754     """Overwrite summary columns in spreadsheet with appropriate data."""
 755     # Upload data table to sheet.
 756     uploader = SSUploader(creds, self.ss_key)
 757     for key, (ws_name, colIx) in self.SUMMARY_SPREADSHEET_COLUMNS.iteritems():
 758       uploader.UploadColumnToWorksheet(ws_name, colIx, self.summary[key])
 759
 760
 761   def SendToCarbon(self):
 762     if self.CARBON_FUNCS_BY_VERSION:
 763       for version, func in enumerate(self.CARBON_FUNCS_BY_VERSION):
 764         # Filter for builds that need to send data to Graphite.
 765         builds = [b for b in self.builds if b.carbon_version < version]
 766         cros_build_lib.Info('Found %d builds that need to send Graphite v%d'
 767                             ' data.', len(builds), version)
 768         if builds:
 769           func(self, builds)
 770
 771   def MarkGathered(self):
 772     """Mark each metadata.json in self.builds as processed.
 773
 774     Applies only to StatsManager subclasses that have UPLOAD_ROW_PER_BUILD
 775     True, as these do not want data from a given build to be re-uploaded.
 776     """
 777     if self.UPLOAD_ROW_PER_BUILD:
 778       metadata_lib.BuildData.MarkBuildsGathered(self.builds,
 779                                                 self.sheets_version,
 780                                                 self.carbon_version,
 781                                                 gs_ctx=self.gs_ctx)
 782
 783
 784 # TODO(mtennant): This class is an untested placeholder.
 785 class CQSlaveStats(StatsManager):
 786   """Stats manager for all CQ slaves."""
 787   # TODO(mtennant): Add Sheets support for each CQ slave.
 788   TABLE_CLASS = None
 789   GET_SHEETS_VERSION = True
 790
 791   def __init__(self, slave_target, **kwargs):
 792     super(CQSlaveStats, self).__init__(slave_target, kwargs)
 793
 794   # TODO(mtennant): This is totally untested, but is a refactoring of the
 795   # graphite code that was in place before for CQ slaves.
 796   def _SendToCarbonV0(self, builds):
 797     # Send runtime data.
 798     def _GetGraphName(build):
 799       bot_id = build['bot-config'].replace('-', '.')
 800       return 'buildbot.builders.%s.duration_seconds' % bot_id
 801
 802     _SendToCarbon(builds, (
 803         _GetGraphName,
 804         lambda b : b.runtime_seconds,
 805         lambda b : b.epoch_time_seconds,
 806     ))
 807
 808
 809 class CQMasterStats(StatsManager):
 810   """Manager stats gathering for the Commit Queue Master."""
 811   TABLE_CLASS = CQMasterTable
 812   UPLOAD_ROW_PER_BUILD = True
 813   BOT_TYPE = CQ
 814   GET_SHEETS_VERSION = True
 815
 816   def __init__(self, **kwargs):
 817     super(CQMasterStats, self).__init__(CQ_MASTER, **kwargs)
 818
 819   def _SendToCarbonV0(self, builds):
 820     # Send runtime data.
 821     _SendToCarbon(builds, (
 822         lambda b : 'buildbot.cq.run_time_seconds',
 823         lambda b : b.runtime_seconds,
 824         lambda b : b.epoch_time_seconds,
 825     ))
 826
 827     # Send CLs per run data.
 828     _SendToCarbon(builds, (
 829         lambda b : 'buildbot.cq.cls_per_run',
 830         lambda b : b.count_changes,
 831         lambda b : b.epoch_time_seconds,
 832     ))
 833
 834   # Organized by by increasing graphite version numbers, starting at 0.
 835   CARBON_FUNCS_BY_VERSION = (
 836       _SendToCarbonV0,
 837   )
 838
 839
 840 class PFQMasterStats(StatsManager):
 841   """Manager stats gathering for the PFQ Master."""
 842   TABLE_CLASS = PFQMasterTable
 843   UPLOAD_ROW_PER_BUILD = True
 844   BOT_TYPE = PFQ
 845   GET_SHEETS_VERSION = True
 846
 847   def __init__(self, **kwargs):
 848     super(PFQMasterStats, self).__init__(PFQ_MASTER, **kwargs)
 849
 850
 851 # TODO(mtennant): Add Sheets support for PreCQ by creating a PreCQTable
 852 # class modeled after CQMasterTable and then adding it as TABLE_CLASS here.
 853 # TODO(mtennant): Add Graphite support for PreCQ by CARBON_FUNCS_BY_VERSION
 854 # in this class.
 855 class PreCQStats(StatsManager):
 856   """Manager stats gathering for the Pre Commit Queue."""
 857   TABLE_CLASS = None
 858   BOT_TYPE = PRE_CQ
 859   GET_SHEETS_VERSION = False
 860
 861   def __init__(self, **kwargs):
 862     super(PreCQStats, self).__init__(PRE_CQ_GROUP_GS_LOCATION, **kwargs)
 863
 864
 865 class CLStats(StatsManager):
 866   """Manager for stats about CL actions taken by the Commit Queue."""
 867   PATCH_HANDLING_TIME_SUMMARY_KEY = 'patch_handling_time'
 868   SUMMARY_SPREADSHEET_COLUMNS = {
 869       PATCH_HANDLING_TIME_SUMMARY_KEY : ('PatchHistogram', 1)}
 870   COL_FAILURE_CATEGORY = 'failure category'
 871   COL_FAILURE_BLAME = 'bug or bad CL'
 872   REASON_BAD_CL = 'Bad CL'
 873   BOT_TYPE = CQ
 874   GET_SHEETS_VERSION = False
 875
 876   def __init__(self, email, **kwargs):
 877     super(CLStats, self).__init__(CQ_MASTER, **kwargs)
 878     self.actions = []
 879     self.per_patch_actions = {}
 880     self.per_cl_actions = {}
 881     self.email = email
 882     self.reasons = {}
 883     self.blames = {}
 884     self.summary = {}
 885     self.pre_cq_stats = PreCQStats()
 886
 887   def GatherFailureReasons(self, creds):
 888     """Gather the reasons why our builds failed and the blamed bugs or CLs.
 889
 890     Args:
 891       creds: A gdata_lib.Creds object.
 892     """
 893     data_table = CQMasterStats.TABLE_CLASS()
 894     uploader = SSUploader(creds, self.ss_key)
 895     ss_failure_category = gdata_lib.PrepColNameForSS(self.COL_FAILURE_CATEGORY)
 896     ss_failure_blame = gdata_lib.PrepColNameForSS(self.COL_FAILURE_BLAME)
 897     rows = uploader.GetRowCacheByCol(data_table.WORKSHEET_NAME,
 898                                      data_table.COL_BUILD_NUMBER)
 899     for b in self.builds:
 900       try:
 901         row = rows[str(b.build_number)]
 902       except KeyError:
 903         self.reasons[b.build_number] = 'None'
 904         self.blames[b.build_number] = []
 905       else:
 906         self.reasons[b.build_number] = str(row[ss_failure_category])
 907         self.blames[b.build_number] = self.ProcessBlameString(
 908             str(row[ss_failure_blame]))
 909
 910   @staticmethod
 911   def ProcessBlameString(blame_string):
 912     """Parse a human-created |blame_string| from the spreadsheet.
 913
 914     Returns:
 915       A list of canonicalized URLs for bugs or CLs that appear in the blame
 916       string. Canonicalized form will be 'crbug.com/1234',
 917       'crosreview.com/1234', 'b/1234', 't/1234', or 'crosreview.com/i/1234' as
 918       applicable.
 919     """
 920     urls = []
 921     tokens = blame_string.split()
 922
 923     # Format to generate the regex patterns. Matches one of provided domain
 924     # names, followed by lazy wildcard, followed by greedy digit wildcard,
 925     # followed by optional slash and optional comma.
 926     general_regex = r'^.*(%s).*?([0-9]+)/?,?$'
 927
 928     crbug = general_regex % 'crbug.com|code.google.com'
 929     internal_review = (general_regex %
 930         'chrome-internal-review.googlesource.com|crosreview.com/i')
 931     external_review = (general_regex %
 932         'crosreview.com|chromium-review.googlesource.com')
 933     guts = (general_regex % 't/|gutsv\d.corp.google.com/#ticket/')
 934
 935     # Buganizer regex is different, as buganizer urls do not end with the bug
 936     # number.
 937     buganizer = r'^.*(b/|b.corp.google.com/issue\?id=)([0-9]+).*$'
 938
 939     # Patterns need to be tried in a specific order -- internal review needs
 940     # to be tried before external review, otherwise urls like crosreview.com/i
 941     # will be incorrectly parsed as external.
 942     patterns = [crbug,
 943                 internal_review,
 944                 external_review,
 945                 buganizer,
 946                 guts]
 947     url_patterns = [CROS_BUG_BASE_URL,
 948                     INTERNAL_CL_BASE_URL,
 949                     EXTERNAL_CL_BASE_URL,
 950                     BUGANIZER_BASE_URL,
 951                     GUTS_BASE_URL]
 952
 953     for t in tokens:
 954       for p, u in zip(patterns, url_patterns):
 955         m = re.match(p, t)
 956         if m:
 957           urls.append(u + m.group(2))
 958           break
 959
 960     return urls
 961
 962   def Gather(self, start_date, sort_by_build_number=True,
 963              starting_build_number=0, creds=None):
 964     """Fetches build data and failure reasons.
 965
 966     Args:
 967       start_date: A datetime.date instance for the earliest build to
 968                   examine.
 969       sort_by_build_number: Optional boolean. If True, builds will be
 970                             sorted by build number.
 971       starting_build_number: The lowest build number from the CQ to include in
 972                              the results.
 973       creds: Login credentials as returned by _PrepareCreds. (optional)
 974     """
 975     if not creds:
 976       creds = _PrepareCreds(self.email)
 977     super(CLStats, self).Gather(start_date,
 978                                 sort_by_build_number=sort_by_build_number,
 979                                 starting_build_number=starting_build_number)
 980     self.GatherFailureReasons(creds)
 981
 982     # Gather the pre-cq stats as well. The build number won't apply here since
 983     # the pre-cq has different build numbers. We intentionally represent the
 984     # Pre-CQ stats in a different object to help keep things simple.
 985     self.pre_cq_stats.Gather(start_date,
 986                              sort_by_build_number=sort_by_build_number)
 987
 988   def GetSubmittedPatchNumber(self, actions):
 989     """Get the patch number of the final patchset submitted.
 990
 991     This function only makes sense for patches that were submitted.
 992
 993     Args:
 994       actions: A list of actions for a single change.
 995     """
 996     submit = [a for a in actions if a.action == constants.CL_ACTION_SUBMITTED]
 997     assert len(submit) == 1, \
 998         'Expected change to be submitted exactly once, got %r' % submit
 999     return submit[-1].change['patch_number']
1000
1001   def ClassifyRejections(self, submitted_changes):
1002     """Categorize rejected CLs, deciding whether the rejection was incorrect.
1003
1004     We figure out what patches were falsely rejected by looking for patches
1005     which were later submitted unmodified after being rejected. These patches
1006     are considered to be likely good CLs.
1007
1008     Args:
1009       submitted_changes: A dict mapping submitted GerritChangeTuple objects to
1010         a list of associated actions.
1011
1012     Yields:
1013       change: The GerritChangeTuple that was rejected.
1014       actions: A list of actions applicable to the CL.
1015       a: The reject action that kicked out the CL.
1016       falsely_rejected: Whether the CL was incorrectly rejected. A CL rejection
1017         is considered incorrect if the same patch is later submitted, with no
1018         changes. It's a heuristic.
1019     """
1020     for change, actions in submitted_changes.iteritems():
1021       submitted_patch_number = self.GetSubmittedPatchNumber(actions)
1022       for a in actions:
1023         # If the patch wasn't included in the run, this means that it "failed
1024         # to apply" rather than "failed to validate". Ignore it.
1025         patch = metadata_lib.GerritPatchTuple(**a.change)
1026         if (a.action == constants.CL_ACTION_KICKED_OUT and
1027             patch in a.build.patches):
1028           # Check whether the patch was updated after submission.
1029           falsely_rejected = a.change['patch_number'] == submitted_patch_number
1030           yield change, actions, a, falsely_rejected
1031
1032   def _PrintCounts(self, reasons, fmt):
1033     """Print a sorted list of reasons in descending order of frequency.
1034
1035     Args:
1036       reasons: A key/value mapping mapping the reason to the count.
1037       fmt: A format string for our log message, containing %(cnt)d
1038         and %(reason)s.
1039     """
1040     d = reasons
1041     for cnt, reason in sorted(((v, k) for (k, v) in d.items()), reverse=True):
1042       logging.info(fmt, dict(cnt=cnt, reason=reason))
1043     if not d:
1044       logging.info('  None')
1045
1046   def CalculateStageFailures(self, reject_actions, submitted_changes,
1047                              good_patch_rejections):
1048     """Calculate what stages correctly or incorrectly failed.
1049
1050     Args:
1051       reject_actions: A list of actions that reject CLs.
1052       submitted_changes: A dict mapping submitted GerritChangeTuple to a list
1053         of associated actions.
1054       good_patch_rejections: A dict mapping submitted GerritPatchTuple to a list
1055         of associated incorrect rejections.
1056
1057     Returns:
1058       correctly_rejected_by_stage: A dict, where dict[bot_type][stage_name]
1059         counts the number of times a probably bad patch was rejected due to a
1060         failure in this stage.
1061       incorrectly_rejected_by_stage: A dict, where dict[bot_type][stage_name]
1062         counts the number of times a probably good patch was rejected due to a
1063         failure in this stage.
1064     """
1065     # Keep track of a list of builds that were manually annotated as bad CL.
1066     # These are used to ensure that we don't treat real failures as being flaky.
1067     bad_cl_builds = set()
1068     for a in reject_actions:
1069       if a.bot_type == CQ:
1070         reason = self.reasons.get(a.build.build_number)
1071         if reason == self.REASON_BAD_CL:
1072           bad_cl_builds.add((a.build.bot_id, a.build.build_number))
1073
1074     # Keep track of the stages that correctly detected a bad CL. We assume
1075     # here that all of the stages that are broken were broken by the bad CL.
1076     correctly_rejected_by_stage = {}
1077     for _, _, a, falsely_rejected in self.ClassifyRejections(submitted_changes):
1078       if not falsely_rejected:
1079         good = correctly_rejected_by_stage.setdefault(a.bot_type, {})
1080         for stage_name in a.build.GetFailedStages():
1081           good[stage_name] = good.get(stage_name, 0) + 1
1082
1083     # Keep track of the stages that failed flakily.
1084     incorrectly_rejected_by_stage = {}
1085     for rejections in good_patch_rejections.values():
1086       for a in rejections:
1087         # A stage only failed flakily if it wasn't broken by another CL.
1088         build_tuple = (a.build.bot_id, a.build.build_number)
1089         if build_tuple not in bad_cl_builds:
1090           bad = incorrectly_rejected_by_stage.setdefault(a.bot_type, {})
1091           for stage_name in a.build.GetFailedStages():
1092             bad[stage_name] = bad.get(stage_name, 0) + 1
1093
1094     return correctly_rejected_by_stage, incorrectly_rejected_by_stage
1095
1096   def GoodPatchRejections(self, submitted_changes):
1097     """Find good patches that were incorrectly rejected.
1098
1099     Args:
1100       submitted_changes: A dict mapping submitted GerritChangeTuple objects to
1101         a list of associated actions.
1102
1103     Returns:
1104       A dict, where d[patch] = reject_actions for each good patch that was
1105       incorrectly rejected.
1106     """
1107     # falsely_rejected_changes maps GerritChangeTuple objects to their actions.
1108     # bad_cl_builds is a set of builds that contain a bad patch.
1109     falsely_rejected_changes = {}
1110     bad_cl_builds = set()
1111     for x in self.ClassifyRejections(submitted_changes):
1112       change, actions, a, falsely_rejected = x
1113       patch = metadata_lib.GerritPatchTuple(**a.change)
1114       if falsely_rejected:
1115         falsely_rejected_changes[change] = actions
1116       elif a.bot_type == PRE_CQ:
1117         # If a developer writes a bad patch and it fails the Pre-CQ, it
1118         # may cause many other patches from the same developer to be
1119         # rejected. This is expected and correct behavior. Treat all of
1120         # the patches in the Pre-CQ run as bad so that they don't skew our
1121         # our statistics.
1122         #
1123         # Since we don't have a spreadsheet for the Pre-CQ, we guess what
1124         # CLs were bad by looking at what patches needed to be changed
1125         # before submission.
1126         #
1127         # NOTE: We intentionally only apply this logic to the Pre-CQ here.
1128         # The CQ is different because it may have many innocent patches in
1129         # a single run which should not be treated as bad.
1130         bad_cl_builds.add((a.build.bot_id, a.build.build_number))
1131
1132     # Make a list of candidate patches that got incorrectly rejected. We track
1133     # them in a dict, setting good_patch_rejections[patch] = rejections for
1134     # each patch.
1135     good_patch_rejections = collections.defaultdict(list)
1136     for v in falsely_rejected_changes.itervalues():
1137       for a in v:
1138         if (a.action == constants.CL_ACTION_KICKED_OUT and
1139             (a.build.bot_id, a.build.build_number) not in bad_cl_builds):
1140           patch = metadata_lib.GerritPatchTuple(**a.change)
1141           good_patch_rejections[patch].append(a)
1142
1143     return good_patch_rejections
1144
1145   def FalseRejectionRate(self, good_patch_count, good_patch_rejection_count):
1146     """Calculate the false rejection ratio.
1147
1148     This is the chance that a good patch will be rejected by the Pre-CQ or CQ
1149     in a given run.
1150
1151     Args:
1152       good_patch_count: The number of good patches in the run.
1153       good_patch_rejection_count: A dict containing the number of false
1154         rejections for the CQ and PRE_CQ.
1155
1156     Returns:
1157       A dict containing the false rejection ratios for CQ, PRE_CQ, and combined.
1158     """
1159     false_rejection_rate = dict()
1160     for bot, rejection_count in good_patch_rejection_count.iteritems():
1161       false_rejection_rate[bot] = (
1162           rejection_count * 100 / (rejection_count + good_patch_count)
1163       )
1164     false_rejection_rate['combined'] = 0
1165     if good_patch_count:
1166       rejection_count = sum(good_patch_rejection_count.values())
1167       false_rejection_rate['combined'] = (
1168           rejection_count * 100 / (good_patch_count + rejection_count)
1169       )
1170     return false_rejection_rate
1171
1172   def Summarize(self):
1173     """Process, print, and return a summary of cl action statistics.
1174
1175     As a side effect, save summary to self.summary.
1176
1177     Returns:
1178       A dictionary summarizing the statistics.
1179     """
1180     super_summary = super(CLStats, self).Summarize()
1181
1182     self.actions = (self.CollectActions() +
1183                     self.pre_cq_stats.CollectActions())
1184
1185     (self.per_patch_actions,
1186      self.per_cl_actions) = self.CollateActions(self.actions)
1187
1188     submit_actions = [a for a in self.actions
1189                       if a.action == constants.CL_ACTION_SUBMITTED]
1190     reject_actions = [a for a in self.actions
1191                       if a.action == constants.CL_ACTION_KICKED_OUT]
1192     sbfail_actions = [a for a in self.actions
1193                       if a.action == constants.CL_ACTION_SUBMIT_FAILED]
1194
1195     build_reason_counts = {}
1196     for reason in self.reasons.values():
1197       if reason != 'None':
1198         build_reason_counts[reason] = build_reason_counts.get(reason, 0) + 1
1199
1200     unique_blames = set()
1201     for blames in self.blames.itervalues():
1202       unique_blames.update(blames)
1203
1204     unique_cl_blames = {blame for blame in unique_blames if
1205                         EXTERNAL_CL_BASE_URL in blame}
1206
1207     submitted_changes = {k : v for k, v, in self.per_cl_actions.iteritems()
1208                          if any(a.action==constants.CL_ACTION_SUBMITTED
1209                                 for a in v)}
1210     submitted_patches = {k : v for k, v, in self.per_patch_actions.iteritems()
1211                          if any(a.action==constants.CL_ACTION_SUBMITTED
1212                                 for a in v)}
1213
1214     patch_handle_times =  [v[-1].timestamp - v[0].timestamp
1215                            for v in submitted_patches.values()]
1216
1217     # Count CLs that were rejected, then a subsequent patch was submitted.
1218     # These are good candidates for bad CLs. We track them in a dict, setting
1219     # submitted_after_new_patch[bot_type][patch] = actions for each bad patch.
1220     submitted_after_new_patch = {}
1221     for x in self.ClassifyRejections(submitted_changes):
1222       change, actions, a, falsely_rejected = x
1223       if not falsely_rejected:
1224         d = submitted_after_new_patch.setdefault(a.bot_type, {})
1225         d[change] = actions
1226
1227     # Sort the candidate bad CLs in order of submit time.
1228     bad_cl_candidates = {}
1229     for bot_type, patch_actions in submitted_after_new_patch.items():
1230       bad_cl_candidates[bot_type] = [
1231         k for k, _ in sorted(patch_actions.items(),
1232                              key=lambda x: x[1][-1].timestamp)]
1233
1234     # Calculate how many good patches were falsely rejected and why.
1235     # good_patch_rejections maps patches to the rejection actions.
1236     # patch_reason_counts maps failure reasons to counts.
1237     # patch_blame_counts maps blame targets to counts.
1238     good_patch_rejections = self.GoodPatchRejections(submitted_changes)
1239     patch_reason_counts = {}
1240     patch_blame_counts = {}
1241     for k, v in good_patch_rejections.iteritems():
1242       for a in v:
1243         if a.action == constants.CL_ACTION_KICKED_OUT:
1244           if a.bot_type == CQ:
1245             reason = self.reasons[a.build.build_number]
1246             blames = self.blames[a.build.build_number]
1247             patch_reason_counts[reason] = patch_reason_counts.get(reason, 0) + 1
1248             for blame in blames:
1249               patch_blame_counts[blame] = patch_blame_counts.get(blame, 0) + 1
1250
1251     # good_patch_count: The number of good patches.
1252     # good_patch_rejection_count maps the bot type (CQ or PRE_CQ) to the number
1253     #   of times that bot has falsely rejected good patches.
1254     good_patch_count = len(submit_actions)
1255     good_patch_rejection_count = collections.defaultdict(int)
1256     for k, v in good_patch_rejections.iteritems():
1257       for a in v:
1258         good_patch_rejection_count[a.bot_type] += 1
1259     false_rejection_rate = self.FalseRejectionRate(good_patch_count,
1260                                                    good_patch_rejection_count)
1261
1262     # This list counts how many times each good patch was rejected.
1263     rejection_counts = [0] * (good_patch_count - len(good_patch_rejections))
1264     rejection_counts += [len(x) for x in good_patch_rejections.values()]
1265
1266     # Break down the frequency of how many times each patch is rejected.
1267     good_patch_rejection_breakdown = []
1268     if rejection_counts:
1269       for x in range(max(rejection_counts) + 1):
1270         good_patch_rejection_breakdown.append((x, rejection_counts.count(x)))
1271
1272     correctly_rejected_by_stage, incorrectly_rejected_by_stage = \
1273         self.CalculateStageFailures(reject_actions, submitted_changes,
1274                                     good_patch_rejections)
1275
1276     summary = {'total_cl_actions'      : len(self.actions),
1277                'unique_cls'            : len(self.per_cl_actions),
1278                'unique_patches'        : len(self.per_patch_actions),
1279                'submitted_patches'     : len(submit_actions),
1280                'rejections'            : len(reject_actions),
1281                'submit_fails'          : len(sbfail_actions),
1282                'good_patch_rejections' : sum(rejection_counts),
1283                'mean_good_patch_rejections' :
1284                    numpy.mean(rejection_counts),
1285                'good_patch_rejection_breakdown' :
1286                    good_patch_rejection_breakdown,
1287                'good_patch_rejection_count' :
1288                    dict(good_patch_rejection_count),
1289                'false_rejection_rate' :
1290                    false_rejection_rate,
1291                'median_handling_time' : numpy.median(patch_handle_times),
1292                self.PATCH_HANDLING_TIME_SUMMARY_KEY : patch_handle_times,
1293                'bad_cl_candidates' : bad_cl_candidates,
1294                'correctly_rejected_by_stage' : correctly_rejected_by_stage,
1295                'incorrectly_rejected_by_stage' : incorrectly_rejected_by_stage,
1296                'unique_blames_change_count' : len(unique_cl_blames),
1297                }
1298
1299     logging.info('CQ committed %s changes', summary['submitted_patches'])
1300     logging.info('CQ correctly rejected %s unique changes',
1301                  summary['unique_blames_change_count'])
1302     logging.info('pre-CQ and CQ incorrectly rejected %s changes a total of '
1303                  '%s times (pre-CQ: %s; CQ: %s)',
1304                  len(good_patch_rejections),
1305                  sum(good_patch_rejection_count.values()),
1306                  good_patch_rejection_count[PRE_CQ],
1307                  good_patch_rejection_count[CQ])
1308
1309     logging.info('      Total CL actions: %d.', summary['total_cl_actions'])
1310     logging.info('    Unique CLs touched: %d.', summary['unique_cls'])
1311     logging.info('Unique patches touched: %d.', summary['unique_patches'])
1312     logging.info('   Total CLs submitted: %d.', summary['submitted_patches'])
1313     logging.info('      Total rejections: %d.', summary['rejections'])
1314     logging.info(' Total submit failures: %d.', summary['submit_fails'])
1315     logging.info(' Good patches rejected: %d.',
1316                  len(good_patch_rejections))
1317     logging.info('   Mean rejections per')
1318     logging.info('            good patch: %.2f',
1319                  summary['mean_good_patch_rejections'])
1320     logging.info(' False rejection rate for CQ: %.1f%%',
1321                  summary['false_rejection_rate'].get(CQ, 0))
1322     logging.info(' False rejection rate for Pre-CQ: %.1f%%',
1323                  summary['false_rejection_rate'].get(PRE_CQ, 0))
1324     logging.info(' Combined false rejection rate: %.1f%%',
1325                  summary['false_rejection_rate']['combined'])
1326
1327     for x, p in summary['good_patch_rejection_breakdown']:
1328       logging.info('%d good patches were rejected %d times.', p, x)
1329     logging.info('     Median good patch')
1330     logging.info('         handling time: %.2f hours',
1331                  summary['median_handling_time']/3600.0)
1332
1333     for bot_type, patches in summary['bad_cl_candidates'].items():
1334       logging.info('%d bad patch candidates were rejected by the %s',
1335                    len(patches), bot_type)
1336       for k in patches:
1337         logging.info('Bad patch candidate in: CL:%s%s',
1338                      constants.INTERNAL_CHANGE_PREFIX
1339                      if k.internal else constants.EXTERNAL_CHANGE_PREFIX,
1340                      k.gerrit_number)
1341
1342     fmt_fai = '  %(cnt)d failures in %(reason)s'
1343     fmt_rej = '  %(cnt)d rejections due to %(reason)s'
1344
1345     logging.info('Reasons why good patches were rejected:')
1346     self._PrintCounts(patch_reason_counts, fmt_rej)
1347
1348     logging.info('Bugs or CLs responsible for good patches rejections:')
1349     self._PrintCounts(patch_blame_counts, fmt_rej)
1350
1351     logging.info('Reasons why builds failed:')
1352     self._PrintCounts(build_reason_counts, fmt_fai)
1353
1354     logging.info('Stages from the Pre-CQ that caught real failures:')
1355     fmt = '  %(cnt)d broken patches were caught by %(reason)s'
1356     self._PrintCounts(correctly_rejected_by_stage.get(PRE_CQ, {}), fmt)
1357
1358     logging.info('Stages from the Pre-CQ that failed but succeeded on retry')
1359     fmt = '  %(cnt)d good patches failed incorrectly in %(reason)s'
1360     self._PrintCounts(incorrectly_rejected_by_stage.get(PRE_CQ, {}), fmt)
1361
1362     super_summary.update(summary)
1363     self.summary = super_summary
1364     return super_summary
1365
1366 # TODO(mtennant): Add token file support.  See upload_package_status.py.
1367 def _PrepareCreds(email, password=None):
1368   """Return a gdata_lib.Creds object from given credentials.
1369
1370   Args:
1371     email: Email address.
1372     password: Password string.  If not specified then a password
1373       prompt will be used.
1374
1375   Returns:
1376     A gdata_lib.Creds object.
1377   """
1378   creds = gdata_lib.Creds()
1379   creds.SetCreds(email, password)
1380   return creds
1381
1382
1383 def _CheckOptions(options):
1384   # Ensure that specified start date is in the past.
1385   now = datetime.datetime.now()
1386   if options.start_date and now.date() < options.start_date:
1387     cros_build_lib.Error('Specified start date is in the future: %s',
1388                          options.start_date)
1389     return False
1390
1391   # The --save option requires --email.
1392   if options.save and not options.email:
1393     cros_build_lib.Error('You must specify --email with --save.')
1394     return False
1395
1396   # The --cl-actions option requires --email.
1397   if options.cl_actions and not options.email:
1398     cros_build_lib.Error('You must specify --email with --cl-actions.')
1399     return False
1400
1401   return True
1402
1403
1404 def GetParser():
1405   """Creates the argparse parser."""
1406   parser = commandline.ArgumentParser(description=__doc__)
1407
1408   # Put options that control the mode of script into mutually exclusive group.
1409   mode = parser.add_mutually_exclusive_group(required=True)
1410   mode.add_argument('--cq-master', action='store_true', default=False,
1411                     help='Gather stats for the CQ master.')
1412   mode.add_argument('--pfq-master', action='store_true', default=False,
1413                     help='Gather stats for the PFQ master.')
1414   mode.add_argument('--pre-cq', action='store_true', default=False,
1415                     help='Gather stats for the Pre-CQ.')
1416   mode.add_argument('--cq-slaves', action='store_true', default=False,
1417                     help='Gather stats for all CQ slaves.')
1418   mode.add_argument('--cl-actions', action='store_true', default=False,
1419                     help='Gather stats about CL actions taken by the CQ '
1420                          'master')
1421   # TODO(mtennant): Other modes as they make sense, like maybe --release.
1422
1423   mode = parser.add_mutually_exclusive_group(required=True)
1424   mode.add_argument('--start-date', action='store', type='date', default=None,
1425                     help='Limit scope to a start date in the past.')
1426   mode.add_argument('--past-month', action='store_true', default=False,
1427                     help='Limit scope to the past 30 days up to now.')
1428   mode.add_argument('--past-week', action='store_true', default=False,
1429                     help='Limit scope to the past week up to now.')
1430   mode.add_argument('--past-day', action='store_true', default=False,
1431                     help='Limit scope to the past day up to now.')
1432
1433   parser.add_argument('--starting-build', action='store', type=int, default=0,
1434                       help='Filter to builds after given number (inclusive).')
1435
1436   parser.add_argument('--save', action='store_true', default=False,
1437                       help='Save results to DB, if applicable.')
1438   parser.add_argument('--email', action='store', type=str, default=None,
1439                       help='Specify email for Google Sheets account to use.')
1440
1441   mode = parser.add_argument_group('Advanced (use at own risk)')
1442   mode.add_argument('--no-upload', action='store_false', default=True,
1443                     dest='upload',
1444                     help='Skip uploading results to spreadsheet')
1445   mode.add_argument('--no-carbon', action='store_false', default=True,
1446                     dest='carbon',
1447                     help='Skip sending results to carbon/graphite')
1448   mode.add_argument('--no-mark-gathered', action='store_false', default=True,
1449                     dest='mark_gathered',
1450                     help='Skip marking results as gathered.')
1451   mode.add_argument('--no-sheets-version-filter', action='store_true',
1452                     default=False,
1453                     help='Upload all parsed metadata to spreasheet regardless '
1454                          'of sheets version.')
1455   mode.add_argument('--override-ss-key', action='store', default=None,
1456                     dest='ss_key',
1457                     help='Override spreadsheet key.')
1458
1459   return parser
1460
1461
1462 def main(argv):
1463   parser = GetParser()
1464   options = parser.parse_args(argv)
1465
1466   if not (_CheckOptions(options)):
1467     sys.exit(1)
1468
1469   # Determine the start date to use, which is required.
1470   if options.start_date:
1471     start_date = options.start_date
1472   else:
1473     assert options.past_month or options.past_week or options.past_day
1474     now = datetime.datetime.now()
1475     if options.past_month:
1476       start_date = (now - datetime.timedelta(days=30)).date()
1477     elif options.past_week:
1478       start_date = (now - datetime.timedelta(days=7)).date()
1479     else:
1480       start_date = (now - datetime.timedelta(days=1)).date()
1481
1482   # Prepare the rounds of stats gathering to do.
1483   stats_managers = []
1484
1485   if options.cq_master:
1486     stats_managers.append(
1487         CQMasterStats(
1488             ss_key=options.ss_key or CQ_SS_KEY,
1489             no_sheets_version_filter=options.no_sheets_version_filter))
1490
1491   if options.cl_actions:
1492     # CL stats manager uses the CQ spreadsheet to fetch failure reasons
1493     stats_managers.append(
1494         CLStats(
1495             options.email,
1496             ss_key=options.ss_key or CQ_SS_KEY,
1497             no_sheets_version_filter=options.no_sheets_version_filter))
1498
1499   if options.pfq_master:
1500     stats_managers.append(
1501         PFQMasterStats(
1502             ss_key=options.ss_key or PFQ_SS_KEY,
1503             no_sheets_version_filter=options.no_sheets_version_filter))
1504
1505   if options.pre_cq:
1506     # TODO(mtennant): Add spreadsheet and/or graphite support for pre-cq.
1507     stats_managers.append(PreCQStats())
1508
1509   if options.cq_slaves:
1510     targets = _GetSlavesOfMaster(CQ_MASTER)
1511     for target in targets:
1512       # TODO(mtennant): Add spreadsheet and/or graphite support for cq-slaves.
1513       stats_managers.append(CQSlaveStats(target))
1514
1515   # If options.save is set and any of the instructions include a table class,
1516   # or specify summary columns for upload, prepare spreadsheet creds object
1517   # early.
1518   creds = None
1519   if options.save and any((stats.UPLOAD_ROW_PER_BUILD or
1520                            stats.SUMMARY_SPREADSHEET_COLUMNS)
1521                           for stats in stats_managers):
1522     # TODO(mtennant): See if this can work with two-factor authentication.
1523     # TODO(mtennant): Eventually, we probably want to use 90-day certs to
1524     # run this as a cronjob on a ganeti instance.
1525     creds = _PrepareCreds(options.email)
1526
1527   # Now run through all the stats gathering that is requested.
1528   for stats_mgr in stats_managers:
1529     stats_mgr.Gather(start_date, starting_build_number=options.starting_build,
1530                      creds=creds)
1531     stats_mgr.Summarize()
1532
1533     if options.save:
1534       # Send data to spreadsheet, if applicable.
1535       if options.upload:
1536         stats_mgr.UploadToSheet(creds)
1537
1538       # Send data to Carbon/Graphite, if applicable.
1539       if options.carbon:
1540         stats_mgr.SendToCarbon()
1541
1542       # Mark these metadata.json files as processed.
1543       if options.mark_gathered:
1544         stats_mgr.MarkGathered()
1545
1546     cros_build_lib.Info('Finished with %s.\n\n', stats_mgr.config_target)
1547
1548
1549 # Background: This function logs the number of tryjob runs, both internal
1550 # and external, to Graphite.  It gets the data from git logs.  It was in
1551 # place, in a very different form, before the migration to
1552 # gather_builder_stats.  It is simplified here, but entirely untested and
1553 # not plumbed into gather_builder_stats anywhere.
1554 def GraphiteTryJobInfoUpToNow(internal, start_date):
1555   """Find the amount of tryjobs that finished on a particular day.
1556
1557   Args:
1558     internal: If true report for internal, if false report external.
1559     start_date: datetime.date object for date to start on.
1560   """
1561   carbon_lines = []
1562
1563   # Apparently scottz had 'trybot' and 'trybot-internal' checkouts in
1564   # his home directory which this code relied on.  Any new solution that
1565   # also relies on git logs will need a place to look for them.
1566   if internal:
1567     repo_path = '/some/path/to/trybot-internal'
1568     marker = 'internal'
1569   else:
1570     repo_path = '/some/path/to/trybot'
1571     marker = 'external'
1572
1573   # Make sure the trybot checkout is up to date.
1574   os.chdir(repo_path)
1575   cros_build_lib.RunCommand(['git', 'pull'], cwd=repo_path)
1576
1577   # Now get a list of datetime objects, in hourly deltas.
1578   now = datetime.datetime.now()
1579   start = datetime.datetime(start_date.year, start_date.month, start_date.day)
1580   hour_delta = datetime.timedelta(hours=1)
1581   end = start + hour_delta
1582   while end < now:
1583     git_cmd = ['git', 'log', '--since="%s"' % start,
1584                '--until="%s"' % end, '--name-only', '--pretty=format:']
1585     result = cros_build_lib.RunCommand(git_cmd, cwd=repo_path)
1586
1587     # Expect one line per tryjob run in the specified hour.
1588     count = len(l for l in result.output.splitlines() if l.strip())
1589
1590     carbon_lines.append('buildbot.tryjobs.%s.hourly %s %s' %
1591                         (marker, count, (end - EPOCH_START).total_seconds()))
1592
1593   graphite.SendToCarbon(carbon_lines)