Imported Upstream version 1.27.0
[platform/upstream/grpc.git] / tools / failures / detect_new_failures.py
1 #!/usr/bin/env python
2 # Copyright 2015 gRPC authors.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 #     http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Detect new flakes and create issues for them"""
16
17 from __future__ import absolute_import
18 from __future__ import division
19 from __future__ import print_function
20
21 import datetime
22 import json
23 import logging
24 import os
25 import pprint
26 import sys
27 import urllib
28 import urllib2
29 from collections import namedtuple
30
31 gcp_utils_dir = os.path.abspath(
32     os.path.join(os.path.dirname(__file__), '../gcp/utils'))
33 sys.path.append(gcp_utils_dir)
34
35 import big_query_utils
36
37 GH_ISSUE_CREATION_URL = 'https://api.github.com/repos/grpc/grpc/issues'
38 GH_ISSUE_SEARCH_URL = 'https://api.github.com/search/issues'
39 KOKORO_BASE_URL = 'https://kokoro2.corp.google.com/job/'
40
41
42 def gh(url, data=None):
43     request = urllib2.Request(url, data=data)
44     assert TOKEN
45     request.add_header('Authorization', 'token {}'.format(TOKEN))
46     if data:
47         request.add_header('Content-type', 'application/json')
48     response = urllib2.urlopen(request)
49     if 200 <= response.getcode() < 300:
50         return json.loads(response.read())
51     else:
52         raise ValueError('Error ({}) accessing {}'.format(
53             response.getcode(), response.geturl()))
54
55
56 def search_gh_issues(search_term, status='open'):
57     params = ' '.join((search_term, 'is:issue', 'is:open', 'repo:grpc/grpc'))
58     qargs = urllib.urlencode({'q': params})
59     url = '?'.join((GH_ISSUE_SEARCH_URL, qargs))
60     response = gh(url)
61     return response
62
63
64 def create_gh_issue(title, body, labels, assignees=[]):
65     params = {'title': title, 'body': body, 'labels': labels}
66     if assignees:
67         params['assignees'] = assignees
68     data = json.dumps(params)
69     response = gh(GH_ISSUE_CREATION_URL, data)
70     issue_url = response['html_url']
71     print('Created issue {} for {}'.format(issue_url, title))
72
73
74 def build_kokoro_url(job_name, build_id):
75     job_path = '{}/{}'.format('/job/'.join(job_name.split('/')), build_id)
76     return KOKORO_BASE_URL + job_path
77
78
79 def create_issues(new_flakes, always_create):
80     for test_name, results_row in new_flakes.items():
81         poll_strategy, job_name, build_id, timestamp = results_row
82         # TODO(dgq): the Kokoro URL has a limited lifetime. The permanent and ideal
83         # URL would be the sponge one, but there's currently no easy way to retrieve
84         # it.
85         url = build_kokoro_url(job_name, build_id)
86         title = 'New Failure: ' + test_name
87         body = '- Test: {}\n- Poll Strategy: {}\n- URL: {}'.format(
88             test_name, poll_strategy, url)
89         labels = ['infra/New Failure']
90         if always_create:
91             proceed = True
92         else:
93             preexisting_issues = search_gh_issues(test_name)
94             if preexisting_issues['total_count'] > 0:
95                 print('\nFound {} issues for "{}":'.format(
96                     preexisting_issues['total_count'], test_name))
97                 for issue in preexisting_issues['items']:
98                     print('\t"{}" ; URL: {}'.format(issue['title'],
99                                                     issue['html_url']))
100             else:
101                 print(
102                     '\nNo preexisting issues found for "{}"'.format(test_name))
103             proceed = raw_input(
104                 'Create issue for:\nTitle: {}\nBody: {}\n[Y/n] '.format(
105                     title, body)) in ('y', 'Y', '')
106         if proceed:
107             assignees_str = raw_input(
108                 'Asignees? (comma-separated, leave blank for unassigned): ')
109             assignees = [
110                 assignee.strip() for assignee in assignees_str.split(',')
111             ]
112             create_gh_issue(title, body, labels, assignees)
113
114
115 def print_table(table, format):
116     first_time = True
117     for test_name, results_row in table.items():
118         poll_strategy, job_name, build_id, timestamp = results_row
119         full_kokoro_url = build_kokoro_url(job_name, build_id)
120         if format == 'human':
121             print("\t- Test: {}, Polling: {}, Timestamp: {}, url: {}".format(
122                 test_name, poll_strategy, timestamp, full_kokoro_url))
123         else:
124             assert (format == 'csv')
125             if first_time:
126                 print('test,timestamp,url')
127                 first_time = False
128             print("{},{},{}".format(test_name, timestamp, full_kokoro_url))
129
130
131 Row = namedtuple('Row', ['poll_strategy', 'job_name', 'build_id', 'timestamp'])
132
133
134 def get_new_failures(dates):
135     bq = big_query_utils.create_big_query()
136     this_script_path = os.path.join(os.path.dirname(__file__))
137     sql_script = os.path.join(this_script_path, 'sql/new_failures_24h.sql')
138     with open(sql_script) as query_file:
139         query = query_file.read().format(
140             calibration_begin=dates['calibration']['begin'],
141             calibration_end=dates['calibration']['end'],
142             reporting_begin=dates['reporting']['begin'],
143             reporting_end=dates['reporting']['end'])
144     logging.debug("Query:\n%s", query)
145     query_job = big_query_utils.sync_query_job(bq, 'grpc-testing', query)
146     page = bq.jobs().getQueryResults(
147         pageToken=None, **query_job['jobReference']).execute(num_retries=3)
148     rows = page.get('rows')
149     if rows:
150         return {
151             row['f'][0]['v']: Row(poll_strategy=row['f'][1]['v'],
152                                   job_name=row['f'][2]['v'],
153                                   build_id=row['f'][3]['v'],
154                                   timestamp=row['f'][4]['v']) for row in rows
155         }
156     else:
157         return {}
158
159
160 def parse_isodate(date_str):
161     return datetime.datetime.strptime(date_str, "%Y-%m-%d").date()
162
163
164 def get_new_flakes(args):
165     """The from_date_str argument marks the beginning of the "calibration", used
166   to establish the set of pre-existing flakes, which extends over
167   "calibration_days".  After the calibration period, "reporting_days" is the
168   length of time during which new flakes will be reported.
169
170 from
171 date
172   |--------------------|---------------|
173   ^____________________^_______________^
174        calibration         reporting
175          days                days
176   """
177     dates = process_date_args(args)
178     new_failures = get_new_failures(dates)
179     logging.info('|new failures| = %d', len(new_failures))
180     return new_failures
181
182
183 def build_args_parser():
184     import argparse, datetime
185     parser = argparse.ArgumentParser()
186     today = datetime.date.today()
187     a_week_ago = today - datetime.timedelta(days=7)
188     parser.add_argument(
189         '--calibration_days',
190         type=int,
191         default=7,
192         help='How many days to consider for pre-existing flakes.')
193     parser.add_argument(
194         '--reporting_days',
195         type=int,
196         default=1,
197         help='How many days to consider for the detection of new flakes.')
198     parser.add_argument('--count_only',
199                         dest='count_only',
200                         action='store_true',
201                         help='Display only number of new flakes.')
202     parser.set_defaults(count_only=False)
203     parser.add_argument('--create_issues',
204                         dest='create_issues',
205                         action='store_true',
206                         help='Create issues for all new flakes.')
207     parser.set_defaults(create_issues=False)
208     parser.add_argument(
209         '--always_create_issues',
210         dest='always_create_issues',
211         action='store_true',
212         help='Always create issues for all new flakes. Otherwise,'
213         ' interactively prompt for every issue.')
214     parser.set_defaults(always_create_issues=False)
215     parser.add_argument(
216         '--token',
217         type=str,
218         default='',
219         help='GitHub token to use its API with a higher rate limit')
220     parser.add_argument('--format',
221                         type=str,
222                         choices=['human', 'csv'],
223                         default='human',
224                         help='Output format: are you a human or a machine?')
225     parser.add_argument(
226         '--loglevel',
227         type=str,
228         choices=['INFO', 'DEBUG', 'WARNING', 'ERROR', 'CRITICAL'],
229         default='WARNING',
230         help='Logging level.')
231     return parser
232
233
234 def process_date_args(args):
235     calibration_begin = (datetime.date.today() -
236                          datetime.timedelta(days=args.calibration_days) -
237                          datetime.timedelta(days=args.reporting_days))
238     calibration_end = calibration_begin + datetime.timedelta(
239         days=args.calibration_days)
240     reporting_begin = calibration_end
241     reporting_end = reporting_begin + datetime.timedelta(
242         days=args.reporting_days)
243     return {
244         'calibration': {
245             'begin': calibration_begin,
246             'end': calibration_end
247         },
248         'reporting': {
249             'begin': reporting_begin,
250             'end': reporting_end
251         }
252     }
253
254
255 def main():
256     global TOKEN
257     args_parser = build_args_parser()
258     args = args_parser.parse_args()
259     if args.create_issues and not args.token:
260         raise ValueError(
261             'Missing --token argument, needed to create GitHub issues')
262     TOKEN = args.token
263
264     logging_level = getattr(logging, args.loglevel)
265     logging.basicConfig(format='%(asctime)s %(message)s', level=logging_level)
266     new_flakes = get_new_flakes(args)
267
268     dates = process_date_args(args)
269
270     dates_info_string = 'from {} until {} (calibrated from {} until {})'.format(
271         dates['reporting']['begin'].isoformat(),
272         dates['reporting']['end'].isoformat(),
273         dates['calibration']['begin'].isoformat(),
274         dates['calibration']['end'].isoformat())
275
276     if args.format == 'human':
277         if args.count_only:
278             print(len(new_flakes), dates_info_string)
279         elif new_flakes:
280             found_msg = 'Found {} new flakes {}'.format(len(new_flakes),
281                                                         dates_info_string)
282             print(found_msg)
283             print('*' * len(found_msg))
284             print_table(new_flakes, 'human')
285             if args.create_issues:
286                 create_issues(new_flakes, args.always_create_issues)
287         else:
288             print('No new flakes found '.format(len(new_flakes)),
289                   dates_info_string)
290     elif args.format == 'csv':
291         if args.count_only:
292             print('from_date,to_date,count')
293             print('{},{},{}'.format(dates['reporting']['begin'].isoformat(),
294                                     dates['reporting']['end'].isoformat(),
295                                     len(new_flakes)))
296         else:
297             print_table(new_flakes, 'csv')
298     else:
299         raise ValueError('Invalid argument for --format: {}'.format(
300             args.format))
301
302
303 if __name__ == '__main__':
304     main()