tools/failures/detect_new_failures.py

   1 #!/usr/bin/env python
   2 # Copyright 2015 gRPC authors.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #     http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15 """Detect new flakes and create issues for them"""
  16
  17 from __future__ import absolute_import
  18 from __future__ import division
  19 from __future__ import print_function
  20
  21 import datetime
  22 import json
  23 import logging
  24 import os
  25 import pprint
  26 import sys
  27 import urllib
  28 import urllib2
  29 from collections import namedtuple
  30
  31 gcp_utils_dir = os.path.abspath(
  32     os.path.join(os.path.dirname(__file__), '../gcp/utils'))
  33 sys.path.append(gcp_utils_dir)
  34
  35 import big_query_utils
  36
  37 GH_ISSUE_CREATION_URL = 'https://api.github.com/repos/grpc/grpc/issues'
  38 GH_ISSUE_SEARCH_URL = 'https://api.github.com/search/issues'
  39 KOKORO_BASE_URL = 'https://kokoro2.corp.google.com/job/'
  40
  41
  42 def gh(url, data=None):
  43     request = urllib2.Request(url, data=data)
  44     assert TOKEN
  45     request.add_header('Authorization', 'token {}'.format(TOKEN))
  46     if data:
  47         request.add_header('Content-type', 'application/json')
  48     response = urllib2.urlopen(request)
  49     if 200 <= response.getcode() < 300:
  50         return json.loads(response.read())
  51     else:
  52         raise ValueError('Error ({}) accessing {}'.format(
  53             response.getcode(), response.geturl()))
  54
  55
  56 def search_gh_issues(search_term, status='open'):
  57     params = ' '.join((search_term, 'is:issue', 'is:open', 'repo:grpc/grpc'))
  58     qargs = urllib.urlencode({'q': params})
  59     url = '?'.join((GH_ISSUE_SEARCH_URL, qargs))
  60     response = gh(url)
  61     return response
  62
  63
  64 def create_gh_issue(title, body, labels, assignees=[]):
  65     params = {'title': title, 'body': body, 'labels': labels}
  66     if assignees:
  67         params['assignees'] = assignees
  68     data = json.dumps(params)
  69     response = gh(GH_ISSUE_CREATION_URL, data)
  70     issue_url = response['html_url']
  71     print('Created issue {} for {}'.format(issue_url, title))
  72
  73
  74 def build_kokoro_url(job_name, build_id):
  75     job_path = '{}/{}'.format('/job/'.join(job_name.split('/')), build_id)
  76     return KOKORO_BASE_URL + job_path
  77
  78
  79 def create_issues(new_flakes, always_create):
  80     for test_name, results_row in new_flakes.items():
  81         poll_strategy, job_name, build_id, timestamp = results_row
  82         # TODO(dgq): the Kokoro URL has a limited lifetime. The permanent and ideal
  83         # URL would be the sponge one, but there's currently no easy way to retrieve
  84         # it.
  85         url = build_kokoro_url(job_name, build_id)
  86         title = 'New Failure: ' + test_name
  87         body = '- Test: {}\n- Poll Strategy: {}\n- URL: {}'.format(
  88             test_name, poll_strategy, url)
  89         labels = ['infra/New Failure']
  90         if always_create:
  91             proceed = True
  92         else:
  93             preexisting_issues = search_gh_issues(test_name)
  94             if preexisting_issues['total_count'] > 0:
  95                 print('\nFound {} issues for "{}":'.format(
  96                     preexisting_issues['total_count'], test_name))
  97                 for issue in preexisting_issues['items']:
  98                     print('\t"{}" ; URL: {}'.format(issue['title'],
  99                                                     issue['html_url']))
 100             else:
 101                 print(
 102                     '\nNo preexisting issues found for "{}"'.format(test_name))
 103             proceed = raw_input(
 104                 'Create issue for:\nTitle: {}\nBody: {}\n[Y/n] '.format(
 105                     title, body)) in ('y', 'Y', '')
 106         if proceed:
 107             assignees_str = raw_input(
 108                 'Asignees? (comma-separated, leave blank for unassigned): ')
 109             assignees = [
 110                 assignee.strip() for assignee in assignees_str.split(',')
 111             ]
 112             create_gh_issue(title, body, labels, assignees)
 113
 114
 115 def print_table(table, format):
 116     first_time = True
 117     for test_name, results_row in table.items():
 118         poll_strategy, job_name, build_id, timestamp = results_row
 119         full_kokoro_url = build_kokoro_url(job_name, build_id)
 120         if format == 'human':
 121             print("\t- Test: {}, Polling: {}, Timestamp: {}, url: {}".format(
 122                 test_name, poll_strategy, timestamp, full_kokoro_url))
 123         else:
 124             assert (format == 'csv')
 125             if first_time:
 126                 print('test,timestamp,url')
 127                 first_time = False
 128             print("{},{},{}".format(test_name, timestamp, full_kokoro_url))
 129
 130
 131 Row = namedtuple('Row', ['poll_strategy', 'job_name', 'build_id', 'timestamp'])
 132
 133
 134 def get_new_failures(dates):
 135     bq = big_query_utils.create_big_query()
 136     this_script_path = os.path.join(os.path.dirname(__file__))
 137     sql_script = os.path.join(this_script_path, 'sql/new_failures_24h.sql')
 138     with open(sql_script) as query_file:
 139         query = query_file.read().format(
 140             calibration_begin=dates['calibration']['begin'],
 141             calibration_end=dates['calibration']['end'],
 142             reporting_begin=dates['reporting']['begin'],
 143             reporting_end=dates['reporting']['end'])
 144     logging.debug("Query:\n%s", query)
 145     query_job = big_query_utils.sync_query_job(bq, 'grpc-testing', query)
 146     page = bq.jobs().getQueryResults(
 147         pageToken=None, **query_job['jobReference']).execute(num_retries=3)
 148     rows = page.get('rows')
 149     if rows:
 150         return {
 151             row['f'][0]['v']: Row(poll_strategy=row['f'][1]['v'],
 152                                   job_name=row['f'][2]['v'],
 153                                   build_id=row['f'][3]['v'],
 154                                   timestamp=row['f'][4]['v']) for row in rows
 155         }
 156     else:
 157         return {}
 158
 159
 160 def parse_isodate(date_str):
 161     return datetime.datetime.strptime(date_str, "%Y-%m-%d").date()
 162
 163
 164 def get_new_flakes(args):
 165     """The from_date_str argument marks the beginning of the "calibration", used
 166   to establish the set of pre-existing flakes, which extends over
 167   "calibration_days".  After the calibration period, "reporting_days" is the
 168   length of time during which new flakes will be reported.
 169
 170 from
 171 date
 172   |--------------------|---------------|
 173   ^____________________^_______________^
 174        calibration         reporting
 175          days                days
 176   """
 177     dates = process_date_args(args)
 178     new_failures = get_new_failures(dates)
 179     logging.info('|new failures| = %d', len(new_failures))
 180     return new_failures
 181
 182
 183 def build_args_parser():
 184     import argparse, datetime
 185     parser = argparse.ArgumentParser()
 186     today = datetime.date.today()
 187     a_week_ago = today - datetime.timedelta(days=7)
 188     parser.add_argument(
 189         '--calibration_days',
 190         type=int,
 191         default=7,
 192         help='How many days to consider for pre-existing flakes.')
 193     parser.add_argument(
 194         '--reporting_days',
 195         type=int,
 196         default=1,
 197         help='How many days to consider for the detection of new flakes.')
 198     parser.add_argument('--count_only',
 199                         dest='count_only',
 200                         action='store_true',
 201                         help='Display only number of new flakes.')
 202     parser.set_defaults(count_only=False)
 203     parser.add_argument('--create_issues',
 204                         dest='create_issues',
 205                         action='store_true',
 206                         help='Create issues for all new flakes.')
 207     parser.set_defaults(create_issues=False)
 208     parser.add_argument(
 209         '--always_create_issues',
 210         dest='always_create_issues',
 211         action='store_true',
 212         help='Always create issues for all new flakes. Otherwise,'
 213         ' interactively prompt for every issue.')
 214     parser.set_defaults(always_create_issues=False)
 215     parser.add_argument(
 216         '--token',
 217         type=str,
 218         default='',
 219         help='GitHub token to use its API with a higher rate limit')
 220     parser.add_argument('--format',
 221                         type=str,
 222                         choices=['human', 'csv'],
 223                         default='human',
 224                         help='Output format: are you a human or a machine?')
 225     parser.add_argument(
 226         '--loglevel',
 227         type=str,
 228         choices=['INFO', 'DEBUG', 'WARNING', 'ERROR', 'CRITICAL'],
 229         default='WARNING',
 230         help='Logging level.')
 231     return parser
 232
 233
 234 def process_date_args(args):
 235     calibration_begin = (datetime.date.today() -
 236                          datetime.timedelta(days=args.calibration_days) -
 237                          datetime.timedelta(days=args.reporting_days))
 238     calibration_end = calibration_begin + datetime.timedelta(
 239         days=args.calibration_days)
 240     reporting_begin = calibration_end
 241     reporting_end = reporting_begin + datetime.timedelta(
 242         days=args.reporting_days)
 243     return {
 244         'calibration': {
 245             'begin': calibration_begin,
 246             'end': calibration_end
 247         },
 248         'reporting': {
 249             'begin': reporting_begin,
 250             'end': reporting_end
 251         }
 252     }
 253
 254
 255 def main():
 256     global TOKEN
 257     args_parser = build_args_parser()
 258     args = args_parser.parse_args()
 259     if args.create_issues and not args.token:
 260         raise ValueError(
 261             'Missing --token argument, needed to create GitHub issues')
 262     TOKEN = args.token
 263
 264     logging_level = getattr(logging, args.loglevel)
 265     logging.basicConfig(format='%(asctime)s %(message)s', level=logging_level)
 266     new_flakes = get_new_flakes(args)
 267
 268     dates = process_date_args(args)
 269
 270     dates_info_string = 'from {} until {} (calibrated from {} until {})'.format(
 271         dates['reporting']['begin'].isoformat(),
 272         dates['reporting']['end'].isoformat(),
 273         dates['calibration']['begin'].isoformat(),
 274         dates['calibration']['end'].isoformat())
 275
 276     if args.format == 'human':
 277         if args.count_only:
 278             print(len(new_flakes), dates_info_string)
 279         elif new_flakes:
 280             found_msg = 'Found {} new flakes {}'.format(len(new_flakes),
 281                                                         dates_info_string)
 282             print(found_msg)
 283             print('*' * len(found_msg))
 284             print_table(new_flakes, 'human')
 285             if args.create_issues:
 286                 create_issues(new_flakes, args.always_create_issues)
 287         else:
 288             print('No new flakes found '.format(len(new_flakes)),
 289                   dates_info_string)
 290     elif args.format == 'csv':
 291         if args.count_only:
 292             print('from_date,to_date,count')
 293             print('{},{},{}'.format(dates['reporting']['begin'].isoformat(),
 294                                     dates['reporting']['end'].isoformat(),
 295                                     len(new_flakes)))
 296         else:
 297             print_table(new_flakes, 'csv')
 298     else:
 299         raise ValueError('Invalid argument for --format: {}'.format(
 300             args.format))
 301
 302
 303 if __name__ == '__main__':
 304     main()