Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / gpu / tools / check_gpu_bots.py
1 #!/usr/bin/env python
2
3 # Copyright 2014 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
6
7 import argparse
8 import datetime
9 import getpass
10 import json
11 import os
12 import smtplib
13 import sys
14 import time
15 import urllib
16 import urllib2
17
18 class Emailer:
19   DEFAULT_EMAIL_PASSWORD_FILE = '.email_password'
20   GMAIL_SMTP_SERVER = 'smtp.gmail.com:587'
21   SUBJECT = 'Chrome GPU Bots Notification'
22
23   def __init__(self, email_from, email_to, email_password_file):
24     self.email_from = email_from
25     self.email_to = email_to
26     self.email_password = Emailer._getEmailPassword(email_password_file)
27
28   @staticmethod
29   def format_email_body(time_str, offline_str, failed_str, noteworthy_str):
30     return '%s%s%s%s' % (time_str, offline_str, failed_str, noteworthy_str)
31
32   def send_email(self, body):
33     message = 'From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s' % (self.email_from,
34             ','.join(self.email_to), Emailer.SUBJECT, body)
35
36     try:
37       server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
38       server.starttls()
39       server.login(self.email_from, self.email_password)
40       server.sendmail(self.email_from, self.email_to, message)
41       server.quit()
42     except Exception as e:
43       print 'Error sending email: %s' % str(e)
44
45   def testEmailLogin(self):
46     server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
47     server.starttls()
48     server.login(self.email_from, self.email_password)
49     server.quit()
50
51   @staticmethod
52   def _getEmailPassword(email_password_file):
53     password = ''
54
55     password_file = (email_password_file if email_password_file is not None
56             else Emailer.DEFAULT_EMAIL_PASSWORD_FILE)
57
58     if os.path.isfile(password_file):
59       with open(password_file, 'r') as f:
60         password = f.read().strip()
61     else:
62       password = getpass.getpass(
63               'Please enter email password for source email account: ')
64
65     return password
66
67 class GpuBot:
68   def __init__(self, waterfall_name, bot_name, bot_data):
69     self.waterfall_name = waterfall_name
70     self.bot_name = bot_name
71     self.bot_data = bot_data
72     self._end_time = None
73     self._hours_since_last_run = None
74     self.failure_string = None
75     self.bot_url = None
76     self.build_url = None
77
78   def getEndTime(self):
79     return self._end_time
80
81   def setEndTime(self, end_time):
82     self._end_time = end_time
83     self._hours_since_last_run = \
84             roughTimeDiffInHours(end_time, time.localtime())
85
86   def getHoursSinceLastRun(self):
87     return self._hours_since_last_run
88
89   def toDict(self):
90     dict = {'waterfall_name': self.waterfall_name, 'bot_name': self.bot_name}
91
92     if self._end_time is not None:
93       dict['end_time'] = serialTime(self._end_time)
94       dict['hours_since_last_run'] = self._hours_since_last_run
95
96     if self.failure_string is not None:
97       dict['failure_string'] = self.failure_string
98
99     if self.bot_url is not None:
100       dict['bot_url'] = self.bot_url
101
102     if self.build_url is not None:
103       dict['build_url'] = self.build_url
104
105     return dict
106
107   @staticmethod
108   def fromDict(dict):
109     gpu_bot = GpuBot(dict['waterfall_name'], dict['bot_name'], None)
110
111     if 'end_time' in dict:
112       gpu_bot._end_time = unserializeTime(dict['end_time'])
113
114     if 'hours_since_last_run' in dict:
115       gpu_bot._hours_since_last_run = dict['hours_since_last_run']
116
117     if 'failure_string' in dict:
118       gpu_bot.failure_string = dict['failure_string']
119
120     if 'bot_url' in dict:
121       gpu_bot.bot_url = dict['bot_url']
122
123     if 'build_url' in dict:
124       gpu_bot.build_url = dict['build_url']
125
126     return gpu_bot
127
128 def errorNoMostRecentBuild(waterfall_name, bot_name):
129   print 'No most recent build available: %s::%s' % (waterfall_name, bot_name)
130
131 class Waterfall:
132   BASE_URL = 'http://build.chromium.org/p/'
133   BASE_BUILD_URL = BASE_URL + '%s/builders/%s'
134   SPECIFIC_BUILD_URL = BASE_URL + '%s/builders/%s/builds/%s'
135   BASE_JSON_BUILDERS_URL = BASE_URL + '%s/json/builders'
136   BASE_JSON_BUILDS_URL = BASE_URL + '%s/json/builders/%s/builds'
137   REGULAR_WATERFALLS = ['chromium.gpu',
138           'tryserver.chromium.gpu',
139           'chromium.gpu.fyi']
140   WEBKIT_GPU_BOTS = ['GPU Win Builder',
141           'GPU Win Builder (dbg)',
142           'GPU Win7 (NVIDIA)',
143           'GPU Win7 (dbg) (NVIDIA)',
144           'GPU Mac Builder',
145           'GPU Mac Builder (dbg)',
146           'GPU Mac10.7',
147           'GPU Mac10.7 (dbg)',
148           'GPU Linux Builder',
149           'GPU Linux Builder (dbg)',
150           'GPU Linux (NVIDIA)',
151           'GPU Linux (dbg) (NVIDIA)']
152   FILTERED_WATERFALLS = [('chromium.webkit', WEBKIT_GPU_BOTS)]
153
154   @staticmethod
155   def getJsonFromUrl(url):
156     conn = urllib2.urlopen(url)
157     result = conn.read()
158     conn.close()
159     return json.loads(result)
160
161   @staticmethod
162   def getBuildersJsonForWaterfall(waterfall):
163     querystring = '?filter'
164     return (Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
165         % (waterfall, querystring)))
166
167   @staticmethod
168   def getLastNBuildsForBuilder(n, waterfall, builder):
169     if n <= 0:
170       return {}
171
172     querystring = '?'
173
174     for i in range(n):
175       querystring += 'select=-%d&' % (i + 1)
176
177     querystring += 'filter'
178
179     return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDS_URL + '%s') %
180             (waterfall, urllib.quote(builder), querystring))
181
182   @staticmethod
183   def getFilteredBuildersJsonForWaterfall(waterfall, filter):
184     querystring = '?'
185
186     for bot_name in filter:
187       querystring += 'select=%s&' % urllib.quote(bot_name)
188
189     querystring += 'filter'
190
191     return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
192             % (waterfall, querystring))
193
194   @staticmethod
195   def getAllGpuBots():
196     allbots = {k: Waterfall.getBuildersJsonForWaterfall(k)
197             for k in Waterfall.REGULAR_WATERFALLS}
198
199     filteredbots = {k[0]:
200             Waterfall.getFilteredBuildersJsonForWaterfall(k[0], k[1])
201             for k in Waterfall.FILTERED_WATERFALLS}
202
203     allbots.update(filteredbots)
204
205     return allbots
206
207   @staticmethod
208   def getOfflineBots(bots):
209     offline_bots = []
210
211     for waterfall_name in bots:
212       waterfall = bots[waterfall_name]
213
214       for bot_name in waterfall:
215         bot = waterfall[bot_name]
216
217         if bot['state'] != 'offline':
218           continue
219
220         gpu_bot = GpuBot(waterfall_name, bot_name, bot)
221         gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
222                 urllib.quote(bot_name))
223
224         most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
225                 gpu_bot)
226
227         if (most_recent_build and 'times' in most_recent_build and
228                 most_recent_build['times']):
229           gpu_bot.setEndTime(time.localtime(most_recent_build['times'][1]))
230         else:
231           errorNoMostRecentBuild(waterfall_name, bot_name)
232
233         offline_bots.append(gpu_bot)
234
235     return offline_bots
236
237   @staticmethod
238   def getMostRecentlyCompletedBuildForBot(bot):
239     if bot.bot_data is not None and 'most_recent_build' in bot.bot_data:
240       return bot.bot_data['most_recent_build']
241
242     # Unfortunately, the JSON API doesn't provide a "most recent completed
243     # build" call. We just have to get some number of the most recent (including
244     # current, in-progress builds) and give up if that's not enough.
245     NUM_BUILDS = 10
246     builds = Waterfall.getLastNBuildsForBuilder(NUM_BUILDS, bot.waterfall_name,
247             bot.bot_name)
248
249     for i in range(NUM_BUILDS):
250       current_build_name = '-%d' % (i + 1)
251       current_build = builds[current_build_name]
252
253       if 'results' in current_build and current_build['results'] is not None:
254         if bot.bot_data is not None:
255           bot.bot_data['most_recent_build'] = current_build
256
257         return current_build
258
259     return None
260
261   @staticmethod
262   def getFailedBots(bots):
263     failed_bots = []
264
265     for waterfall_name in bots:
266       waterfall = bots[waterfall_name]
267
268       for bot_name in waterfall:
269         bot = waterfall[bot_name]
270         gpu_bot = GpuBot(waterfall_name, bot_name, bot)
271         gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
272                 urllib.quote(bot_name))
273
274         most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
275                 gpu_bot)
276
277         if (most_recent_build and 'text' in most_recent_build and
278                 'failed' in most_recent_build['text']):
279           gpu_bot.failure_string = ' '.join(most_recent_build['text'])
280           gpu_bot.build_url = Waterfall.SPECIFIC_BUILD_URL % (waterfall_name,
281                   urllib.quote(bot_name), most_recent_build['number'])
282           failed_bots.append(gpu_bot)
283         elif not most_recent_build:
284           errorNoMostRecentBuild(waterfall_name, bot_name)
285
286     return failed_bots
287
288 def formatTime(t):
289   return time.strftime("%a, %d %b %Y %H:%M:%S", t)
290
291 def roughTimeDiffInHours(t1, t2):
292   datetimes = []
293
294   for t in [t1, t2]:
295     datetimes.append(datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday,
296         t.tm_hour, t.tm_min, t.tm_sec))
297
298   datetime_diff = datetimes[0] - datetimes[1]
299
300   hours = float(datetime_diff.total_seconds()) / 3600.0
301
302   return abs(hours)
303
304 def getBotStr(bot):
305   s = '  %s::%s\n' % (bot.waterfall_name, bot.bot_name)
306
307   if bot.failure_string is not None:
308     s += '  failure: %s\n' % bot.failure_string
309
310   if bot.getEndTime() is not None:
311     s += ('  last build end time: %s (roughly %f hours ago)\n' %
312     (formatTime(bot.getEndTime()), bot.getHoursSinceLastRun()))
313
314   if bot.bot_url is not None:
315     s += '  bot url: %s\n' % bot.bot_url
316
317   if bot.build_url is not None:
318     s += '  build url: %s\n' % bot.build_url
319
320   s += '\n'
321   return s
322
323 def getBotsStr(bots):
324   s = ''
325
326   for bot in bots:
327     s += getBotStr(bot)
328
329   s += '\n'
330   return s
331
332 def getOfflineBotsStr(offline_bots):
333   return 'Offline bots:\n%s' % getBotsStr(offline_bots)
334
335 def getFailedBotsStr(failed_bots):
336   return 'Failed bots:\n%s' % getBotsStr(failed_bots)
337
338 def getBotDicts(bots):
339   dicts = []
340
341   for bot in bots:
342     dicts.append(bot.toDict())
343
344   return dicts
345
346 def unserializeTime(t):
347   return time.struct_time((t['year'], t['mon'], t['day'], t['hour'], t['min'],
348       t['sec'], 0, 0, 0))
349
350 def serialTime(t):
351   return {'year': t.tm_year, 'mon': t.tm_mon, 'day': t.tm_mday,
352           'hour': t.tm_hour, 'min': t.tm_min, 'sec': t.tm_sec}
353
354 def getSummary(offline_bots, failed_bots):
355   offline_bot_dict = getBotDicts(offline_bots)
356   failed_bot_dict = getBotDicts(failed_bots)
357   return {'offline': offline_bot_dict, 'failed': failed_bot_dict}
358
359 def findBot(name, lst):
360   for bot in lst:
361     if bot.bot_name == name:
362       return bot
363
364   return None
365
366 def getNoteworthyEvents(offline_bots, failed_bots, previous_results):
367   CRITICAL_NUM_HOURS = 1.0
368
369   previous_offline = (previous_results['offline'] if 'offline'
370           in previous_results else [])
371
372   previous_failures = (previous_results['failed'] if 'failed'
373           in previous_results else [])
374
375   noteworthy_offline = []
376   for bot in offline_bots:
377     if bot.getHoursSinceLastRun() >= CRITICAL_NUM_HOURS:
378       previous_bot = findBot(bot.bot_name, previous_offline)
379
380       if (previous_bot is None or
381               previous_bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS):
382         noteworthy_offline.append(bot)
383
384   noteworthy_new_failures = []
385   for bot in failed_bots:
386     previous_bot = findBot(bot.bot_name, previous_failures)
387
388     if previous_bot is None:
389       noteworthy_new_failures.append(bot)
390
391   noteworthy_new_offline_recoveries = []
392   for bot in previous_offline:
393     if bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS:
394       continue
395
396     current_bot = findBot(bot.bot_name, offline_bots)
397     if current_bot is None:
398       noteworthy_new_offline_recoveries.append(bot)
399
400   noteworthy_new_failure_recoveries = []
401   for bot in previous_failures:
402     current_bot = findBot(bot.bot_name, failed_bots)
403
404     if current_bot is None:
405       noteworthy_new_failure_recoveries.append(bot)
406
407   return {'offline': noteworthy_offline, 'failed': noteworthy_new_failures,
408           'recovered_failures': noteworthy_new_failure_recoveries,
409           'recovered_offline': noteworthy_new_offline_recoveries}
410
411 def getNoteworthyStr(noteworthy_events):
412   s = ''
413
414   if noteworthy_events['offline']:
415     s += 'IMPORTANT bots newly offline for over an hour:\n'
416
417     for bot in noteworthy_events['offline']:
418       s += getBotStr(bot)
419
420     s += '\n'
421
422   if noteworthy_events['failed']:
423     s += 'IMPORTANT new failing bots:\n'
424
425     for bot in noteworthy_events['failed']:
426       s += getBotStr(bot)
427
428     s += '\n'
429
430   if noteworthy_events['recovered_offline']:
431     s += 'IMPORTANT newly recovered previously offline bots:\n'
432
433     for bot in noteworthy_events['recovered_offline']:
434       s += getBotStr(bot)
435
436     s += '\n'
437
438   if noteworthy_events['recovered_failures']:
439     s += 'IMPORTANT newly recovered failing bots:\n'
440
441     for bot in noteworthy_events['recovered_failures']:
442       s += getBotStr(bot)
443
444     s += '\n'
445
446   return s
447
448 def dictsToBots(bots):
449   offline_bots = []
450   for bot in bots['offline']:
451     offline_bots.append(GpuBot.fromDict(bot))
452
453   failed_bots = []
454   for bot in bots['failed']:
455     failed_bots.append(GpuBot.fromDict(bot))
456
457   return {'offline': offline_bots, 'failed': failed_bots}
458
459 class GpuBotPoller:
460   DEFAULT_PREVIOUS_RESULTS_FILE = '.check_gpu_bots_previous_results'
461
462   def __init__(self, emailer, send_email_for_recovered_offline_bots,
463           send_email_for_recovered_failing_bots, send_email_on_error,
464           previous_results_file):
465     self.emailer = emailer
466
467     self.send_email_for_recovered_offline_bots = \
468             send_email_for_recovered_offline_bots
469
470     self.send_email_for_recovered_failing_bots = \
471             send_email_for_recovered_failing_bots
472
473     self.send_email_on_error = send_email_on_error
474     self.previous_results_file = previous_results_file
475
476   def shouldEmail(self, noteworthy_events):
477     if noteworthy_events['offline'] or noteworthy_events['failed']:
478       return True
479
480     if (self.send_email_for_recovered_offline_bots and
481             noteworthy_events['recovered_offline']):
482       return True
483
484     if (self.send_email_for_recovered_failing_bots and
485           noteworthy_events['recovered_failures']):
486       return True
487
488     return False
489
490   def writeResults(self, summary):
491     results_file = (self.previous_results_file
492             if self.previous_results_file is not None
493             else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
494
495     with open(results_file, 'w') as f:
496       f.write(json.dumps(summary))
497
498   def getPreviousResults(self):
499     previous_results_file = (self.previous_results_file
500             if self.previous_results_file is not None
501             else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
502
503     previous_results = {}
504     if os.path.isfile(previous_results_file):
505       with open(previous_results_file, 'r') as f:
506         previous_results = dictsToBots(json.loads(f.read()))
507
508     return previous_results
509
510   def checkBots(self):
511     time_str = 'Current time: %s\n\n' % (formatTime(time.localtime()))
512     print time_str
513
514     try:
515       bots = Waterfall.getAllGpuBots()
516
517       offline_bots = Waterfall.getOfflineBots(bots)
518       offline_str = getOfflineBotsStr(offline_bots)
519       print offline_str
520
521       failed_bots = Waterfall.getFailedBots(bots)
522       failed_str = getFailedBotsStr(failed_bots)
523       print failed_str
524
525       previous_results = self.getPreviousResults()
526       noteworthy_events = getNoteworthyEvents(offline_bots, failed_bots,
527               previous_results)
528
529       noteworthy_str = getNoteworthyStr(noteworthy_events)
530       print noteworthy_str
531
532       summary = getSummary(offline_bots, failed_bots)
533       self.writeResults(summary)
534
535       if (self.emailer is not None and self.shouldEmail(noteworthy_events)):
536         self.emailer.send_email(Emailer.format_email_body(time_str, offline_str,
537             failed_str, noteworthy_str))
538     except Exception as e:
539       error_str = 'Error: %s' % str(e)
540       print error_str
541
542       if self.send_email_on_error:
543         self.emailer.send_email(error_str)
544
545 def parseArgs(sys_args):
546   parser = argparse.ArgumentParser(prog=sys_args[0],
547           description='Query the Chromium GPU Bots Waterfall, output ' +
548           'potential problems, and optionally repeat automatically and/or ' +
549           'email notifications of results.')
550
551   parser.add_argument('--repeat-delay', type=int, dest='repeat_delay',
552           required=False,
553           help='How often to automatically re-run the script, in minutes.')
554
555   parser.add_argument('--email-from', type=str, dest='email_from',
556           required=False,
557           help='Email address to send from. Requires also specifying ' +
558           '\'--email-to\'.')
559
560   parser.add_argument('--email-to', type=str, dest='email_to', required=False,
561           nargs='+',
562           help='Email address(es) to send to. Requires also specifying ' +
563           '\'--email-from\'')
564
565   parser.add_argument('--send-email-for-recovered-offline-bots',
566           dest='send_email_for_recovered_offline_bots', action='store_true',
567           default=False,
568           help='Send an email out when a bot which has been offline for more ' +
569           'than 1 hour goes back online.')
570
571   parser.add_argument('--send-email-for-recovered-failing-bots',
572           dest='send_email_for_recovered_failing_bots',
573           action='store_true', default=False,
574           help='Send an email when a failing bot recovers.')
575
576   parser.add_argument('--send-email-on-error',
577           dest='send_email_on_error',
578           action='store_true', default=False,
579           help='Send an email when the script has an error. For example, if ' +
580           'the server is unreachable.')
581
582   parser.add_argument('--email-password-file',
583           dest='email_password_file',
584           required=False,
585           help=(('File containing the plaintext password of the source email ' +
586           'account. By default, \'%s\' will be tried. If it does not exist, ' +
587           'you will be prompted. If you opt to store your password on disk ' +
588           'in plaintext, use of a dummy account is strongly recommended.')
589           % Emailer.DEFAULT_EMAIL_PASSWORD_FILE))
590
591   parser.add_argument('--previous-results-file',
592           dest='previous_results_file',
593           required=False,
594           help=(('File to store the results of the previous invocation of ' +
595               'this script. By default, \'%s\' will be used.')
596               % GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE))
597
598   args = parser.parse_args(sys_args[1:])
599
600   if args.email_from is not None and args.email_to is None:
601     parser.error('--email-from requires --email-to.')
602   elif args.email_to is not None and args.email_from is None:
603     parser.error('--email-to requires --email-from.')
604   elif args.email_from is None and args.send_email_for_recovered_offline_bots:
605     parser.error('--send-email-for-recovered-offline-bots requires ' +
606             '--email-to and --email-from.')
607   elif (args.email_from is None and args.send_email_for_recovered_failing_bots):
608     parser.error('--send-email-for-recovered-failing-bots ' +
609             'requires --email-to and --email-from.')
610   elif (args.email_from is None and args.send_email_on_error):
611     parser.error('--send-email-on-error ' +
612             'requires --email-to and --email-from.')
613   elif (args.email_password_file and
614           not os.path.isfile(args.email_password_file)):
615     parser.error('File does not exist: %s' % args.email_password_file)
616
617   return args
618
619 def main(sys_args):
620   args = parseArgs(sys_args)
621
622   emailer = None
623   if args.email_from is not None and args.email_to is not None:
624     emailer = Emailer(args.email_from, args.email_to, args.email_password_file)
625
626     try:
627       emailer.testEmailLogin()
628     except Exception as e:
629       print 'Error logging into email account: %s' % str(e)
630       return 1
631
632   poller = GpuBotPoller(emailer,
633           args.send_email_for_recovered_offline_bots,
634           args.send_email_for_recovered_failing_bots,
635           args.send_email_on_error,
636           args.previous_results_file)
637
638   while True:
639     poller.checkBots()
640
641     if args.repeat_delay is None:
642       break
643
644     print 'Will run again in %d minutes...\n' % args.repeat_delay
645     time.sleep(args.repeat_delay * 60)
646
647   return 0
648
649 if __name__ == '__main__':
650   sys.exit(main(sys.argv))