src/tools/bisect-builds.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Snapshot Build Bisect Tool
   7
   8 This script bisects a snapshot archive using binary search. It starts at
   9 a bad revision (it will try to guess HEAD) and asks for a last known-good
  10 revision. It will then binary search across this revision range by downloading,
  11 unzipping, and opening Chromium for you. After testing the specific revision,
  12 it will ask you whether it is good or bad before continuing the search.
  13 """
  14
  15 # The base URL for stored build archives.
  16 CHROMIUM_BASE_URL = ('http://commondatastorage.googleapis.com'
  17                      '/chromium-browser-snapshots')
  18 WEBKIT_BASE_URL = ('http://commondatastorage.googleapis.com'
  19                    '/chromium-webkit-snapshots')
  20 ASAN_BASE_URL = ('http://commondatastorage.googleapis.com'
  21                  '/chromium-browser-asan')
  22
  23 # The base URL for official builds.
  24 OFFICIAL_BASE_URL = 'http://master.chrome.corp.google.com/official_builds'
  25
  26 # URL template for viewing changelogs between revisions.
  27 CHANGELOG_URL = ('http://build.chromium.org'
  28                  '/f/chromium/perf/dashboard/ui/changelog.html'
  29                  '?url=/trunk/src&range=%d%%3A%d')
  30
  31 # URL template for viewing changelogs between official versions.
  32 OFFICIAL_CHANGELOG_URL = ('http://omahaproxy.appspot.com/changelog'
  33                           '?old_version=%s&new_version=%s')
  34
  35 # DEPS file URL.
  36 DEPS_FILE = 'http://src.chromium.org/viewvc/chrome/trunk/src/DEPS?revision=%d'
  37
  38 # Blink changelogs URL.
  39 BLINK_CHANGELOG_URL = ('http://build.chromium.org'
  40                       '/f/chromium/perf/dashboard/ui/changelog_blink.html'
  41                       '?url=/trunk&range=%d%%3A%d')
  42
  43 DONE_MESSAGE_GOOD_MIN = ('You are probably looking for a change made after %s ('
  44                          'known good), but no later than %s (first known bad).')
  45 DONE_MESSAGE_GOOD_MAX = ('You are probably looking for a change made after %s ('
  46                          'known bad), but no later than %s (first known good).')
  47
  48 CHROMIUM_GITHASH_TO_SVN_URL = (
  49     'https://chromium.googlesource.com/chromium/src/+/%s?format=json')
  50
  51 BLINK_GITHASH_TO_SVN_URL = (
  52     'https://chromium.googlesource.com/chromium/blink/+/%s?format=json')
  53
  54 GITHASH_TO_SVN_URL = {
  55     'chromium': CHROMIUM_GITHASH_TO_SVN_URL,
  56     'blink': BLINK_GITHASH_TO_SVN_URL,
  57 }
  58
  59 # Search pattern to be matched in the JSON output from
  60 # CHROMIUM_GITHASH_TO_SVN_URL to get the chromium revision (svn revision).
  61 CHROMIUM_SEARCH_PATTERN = (
  62     r'.*git-svn-id: svn://svn.chromium.org/chrome/trunk/src@(\d+) ')
  63
  64 # Search pattern to be matched in the json output from
  65 # BLINK_GITHASH_TO_SVN_URL to get the blink revision (svn revision).
  66 BLINK_SEARCH_PATTERN = (
  67     r'.*git-svn-id: svn://svn.chromium.org/blink/trunk@(\d+) ')
  68
  69 SEARCH_PATTERN = {
  70     'chromium': CHROMIUM_SEARCH_PATTERN,
  71     'blink': BLINK_SEARCH_PATTERN,
  72 }
  73
  74 ###############################################################################
  75
  76 import json
  77 import optparse
  78 import os
  79 import re
  80 import shlex
  81 import shutil
  82 import subprocess
  83 import sys
  84 import tempfile
  85 import threading
  86 import urllib
  87 from distutils.version import LooseVersion
  88 from xml.etree import ElementTree
  89 import zipfile
  90
  91
  92 class PathContext(object):
  93   """A PathContext is used to carry the information used to construct URLs and
  94   paths when dealing with the storage server and archives."""
  95   def __init__(self, base_url, platform, good_revision, bad_revision,
  96                is_official, is_asan, use_local_repo, flash_path = None,
  97                pdf_path = None):
  98     super(PathContext, self).__init__()
  99     # Store off the input parameters.
 100     self.base_url = base_url
 101     self.platform = platform  # What's passed in to the '-a/--archive' option.
 102     self.good_revision = good_revision
 103     self.bad_revision = bad_revision
 104     self.is_official = is_official
 105     self.is_asan = is_asan
 106     self.build_type = 'release'
 107     self.flash_path = flash_path
 108     # Dictionary which stores svn revision number as key and it's
 109     # corresponding git hash as value. This data is populated in
 110     # _FetchAndParse and used later in GetDownloadURL while downloading
 111     # the build.
 112     self.githash_svn_dict = {}
 113     self.pdf_path = pdf_path
 114
 115     # The name of the ZIP file in a revision directory on the server.
 116     self.archive_name = None
 117
 118     # If the script is run from a local Chromium checkout,
 119     # "--use-local-repo" option can be used to make the script run faster.
 120     # It uses "git svn find-rev <SHA1>" command to convert git hash to svn
 121     # revision number.
 122     self.use_local_repo = use_local_repo
 123
 124     # Set some internal members:
 125     #   _listing_platform_dir = Directory that holds revisions. Ends with a '/'.
 126     #   _archive_extract_dir = Uncompressed directory in the archive_name file.
 127     #   _binary_name = The name of the executable to run.
 128     if self.platform in ('linux', 'linux64', 'linux-arm'):
 129       self._binary_name = 'chrome'
 130     elif self.platform == 'mac':
 131       self.archive_name = 'chrome-mac.zip'
 132       self._archive_extract_dir = 'chrome-mac'
 133     elif self.platform == 'win':
 134       self.archive_name = 'chrome-win32.zip'
 135       self._archive_extract_dir = 'chrome-win32'
 136       self._binary_name = 'chrome.exe'
 137     else:
 138       raise Exception('Invalid platform: %s' % self.platform)
 139
 140     if is_official:
 141       if self.platform == 'linux':
 142         self._listing_platform_dir = 'precise32bit/'
 143         self.archive_name = 'chrome-precise32bit.zip'
 144         self._archive_extract_dir = 'chrome-precise32bit'
 145       elif self.platform == 'linux64':
 146         self._listing_platform_dir = 'precise64bit/'
 147         self.archive_name = 'chrome-precise64bit.zip'
 148         self._archive_extract_dir = 'chrome-precise64bit'
 149       elif self.platform == 'mac':
 150         self._listing_platform_dir = 'mac/'
 151         self._binary_name = 'Google Chrome.app/Contents/MacOS/Google Chrome'
 152       elif self.platform == 'win':
 153         self._listing_platform_dir = 'win/'
 154     else:
 155       if self.platform in ('linux', 'linux64', 'linux-arm'):
 156         self.archive_name = 'chrome-linux.zip'
 157         self._archive_extract_dir = 'chrome-linux'
 158         if self.platform == 'linux':
 159           self._listing_platform_dir = 'Linux/'
 160         elif self.platform == 'linux64':
 161           self._listing_platform_dir = 'Linux_x64/'
 162         elif self.platform == 'linux-arm':
 163           self._listing_platform_dir = 'Linux_ARM_Cross-Compile/'
 164       elif self.platform == 'mac':
 165         self._listing_platform_dir = 'Mac/'
 166         self._binary_name = 'Chromium.app/Contents/MacOS/Chromium'
 167       elif self.platform == 'win':
 168         self._listing_platform_dir = 'Win/'
 169
 170   def GetASANPlatformDir(self):
 171     """ASAN builds are in directories like "linux-release", or have filenames
 172     like "asan-win32-release-277079.zip". This aligns to our platform names
 173     except in the case of Windows where they use "win32" instead of "win"."""
 174     if self.platform == 'win':
 175       return 'win32'
 176     else:
 177       return self.platform
 178
 179   def GetListingURL(self, marker=None):
 180     """Returns the URL for a directory listing, with an optional marker."""
 181     marker_param = ''
 182     if marker:
 183       marker_param = '&marker=' + str(marker)
 184     if self.is_asan:
 185       prefix = '%s-%s' % (self.GetASANPlatformDir(), self.build_type)
 186       return self.base_url + '/?delimiter=&prefix=' + prefix + marker_param
 187     else:
 188       return (self.base_url + '/?delimiter=/&prefix=' +
 189               self._listing_platform_dir + marker_param)
 190
 191   def GetDownloadURL(self, revision):
 192     """Gets the download URL for a build archive of a specific revision."""
 193     if self.is_asan:
 194       return '%s/%s-%s/%s-%d.zip' % (
 195           ASAN_BASE_URL, self.GetASANPlatformDir(), self.build_type,
 196           self.GetASANBaseName(), revision)
 197     if self.is_official:
 198       return '%s/%s/%s%s' % (
 199           OFFICIAL_BASE_URL, revision, self._listing_platform_dir,
 200           self.archive_name)
 201     else:
 202       if str(revision) in self.githash_svn_dict:
 203         revision = self.githash_svn_dict[str(revision)]
 204       return '%s/%s%s/%s' % (self.base_url, self._listing_platform_dir,
 205                              revision, self.archive_name)
 206
 207   def GetLastChangeURL(self):
 208     """Returns a URL to the LAST_CHANGE file."""
 209     return self.base_url + '/' + self._listing_platform_dir + 'LAST_CHANGE'
 210
 211   def GetASANBaseName(self):
 212     """Returns the base name of the ASAN zip file."""
 213     if 'linux' in self.platform:
 214       return 'asan-symbolized-%s-%s' % (self.GetASANPlatformDir(),
 215                                         self.build_type)
 216     else:
 217       return 'asan-%s-%s' % (self.GetASANPlatformDir(), self.build_type)
 218
 219   def GetLaunchPath(self, revision):
 220     """Returns a relative path (presumably from the archive extraction location)
 221     that is used to run the executable."""
 222     if self.is_asan:
 223       extract_dir = '%s-%d' % (self.GetASANBaseName(), revision)
 224     else:
 225       extract_dir = self._archive_extract_dir
 226     return os.path.join(extract_dir, self._binary_name)
 227
 228   def ParseDirectoryIndex(self):
 229     """Parses the Google Storage directory listing into a list of revision
 230     numbers."""
 231
 232     def _FetchAndParse(url):
 233       """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
 234       next-marker is not None, then the listing is a partial listing and another
 235       fetch should be performed with next-marker being the marker= GET
 236       parameter."""
 237       handle = urllib.urlopen(url)
 238       document = ElementTree.parse(handle)
 239
 240       # All nodes in the tree are namespaced. Get the root's tag name to extract
 241       # the namespace. Etree does namespaces as |{namespace}tag|.
 242       root_tag = document.getroot().tag
 243       end_ns_pos = root_tag.find('}')
 244       if end_ns_pos == -1:
 245         raise Exception('Could not locate end namespace for directory index')
 246       namespace = root_tag[:end_ns_pos + 1]
 247
 248       # Find the prefix (_listing_platform_dir) and whether or not the list is
 249       # truncated.
 250       prefix_len = len(document.find(namespace + 'Prefix').text)
 251       next_marker = None
 252       is_truncated = document.find(namespace + 'IsTruncated')
 253       if is_truncated is not None and is_truncated.text.lower() == 'true':
 254         next_marker = document.find(namespace + 'NextMarker').text
 255       # Get a list of all the revisions.
 256       revisions = []
 257       githash_svn_dict = {}
 258       if self.is_asan:
 259         asan_regex = re.compile(r'.*%s-(\d+)\.zip$' % (self.GetASANBaseName()))
 260         # Non ASAN builds are in a <revision> directory. The ASAN builds are
 261         # flat
 262         all_prefixes = document.findall(namespace + 'Contents/' +
 263                                         namespace + 'Key')
 264         for prefix in all_prefixes:
 265           m = asan_regex.match(prefix.text)
 266           if m:
 267             try:
 268               revisions.append(int(m.group(1)))
 269             except ValueError:
 270               pass
 271       else:
 272         all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
 273                                         namespace + 'Prefix')
 274         # The <Prefix> nodes have content of the form of
 275         # |_listing_platform_dir/revision/|. Strip off the platform dir and the
 276         # trailing slash to just have a number.
 277         for prefix in all_prefixes:
 278           revnum = prefix.text[prefix_len:-1]
 279           try:
 280             if not revnum.isdigit():
 281               git_hash = revnum
 282               revnum = self.GetSVNRevisionFromGitHash(git_hash)
 283               githash_svn_dict[revnum] = git_hash
 284             if revnum is not None:
 285               revnum = int(revnum)
 286               revisions.append(revnum)
 287           except ValueError:
 288             pass
 289       return (revisions, next_marker, githash_svn_dict)
 290
 291     # Fetch the first list of revisions.
 292     (revisions, next_marker, self.githash_svn_dict) = _FetchAndParse(
 293         self.GetListingURL())
 294     # If the result list was truncated, refetch with the next marker. Do this
 295     # until an entire directory listing is done.
 296     while next_marker:
 297       next_url = self.GetListingURL(next_marker)
 298       (new_revisions, next_marker, new_dict) = _FetchAndParse(next_url)
 299       revisions.extend(new_revisions)
 300       self.githash_svn_dict.update(new_dict)
 301     return revisions
 302
 303   def _GetSVNRevisionFromGitHashWithoutGitCheckout(self, git_sha1, depot):
 304     json_url = GITHASH_TO_SVN_URL[depot] % git_sha1
 305     response = urllib.urlopen(json_url)
 306     if response.getcode() == 200:
 307       try:
 308         data = json.loads(response.read()[4:])
 309       except ValueError:
 310         print 'ValueError for JSON URL: %s' % json_url
 311         raise ValueError
 312     else:
 313       raise ValueError
 314     if 'message' in data:
 315       message = data['message'].split('\n')
 316       message = [line for line in message if line.strip()]
 317       search_pattern = re.compile(SEARCH_PATTERN[depot])
 318       result = search_pattern.search(message[len(message)-1])
 319       if result:
 320         return result.group(1)
 321     print 'Failed to get svn revision number for %s' % git_sha1
 322     raise ValueError
 323
 324   def _GetSVNRevisionFromGitHashFromGitCheckout(self, git_sha1, depot):
 325     def _RunGit(command, path):
 326       command = ['git'] + command
 327       if path:
 328         original_path = os.getcwd()
 329         os.chdir(path)
 330       shell = sys.platform.startswith('win')
 331       proc = subprocess.Popen(command, shell=shell, stdout=subprocess.PIPE,
 332                               stderr=subprocess.PIPE)
 333       (output, _) = proc.communicate()
 334
 335       if path:
 336         os.chdir(original_path)
 337       return (output, proc.returncode)
 338
 339     path = None
 340     if depot == 'blink':
 341       path = os.path.join(os.getcwd(), 'third_party', 'WebKit')
 342     if os.path.basename(os.getcwd()) == 'src':
 343       command = ['svn', 'find-rev', git_sha1]
 344       (git_output, return_code) = _RunGit(command, path)
 345       if not return_code:
 346         return git_output.strip('\n')
 347       raise ValueError
 348     else:
 349       print ('Script should be run from src folder. ' +
 350              'Eg: python tools/bisect-builds.py -g 280588 -b 280590' +
 351              '--archive linux64 --use-local-repo')
 352       sys.exit(1)
 353
 354   def GetSVNRevisionFromGitHash(self, git_sha1, depot='chromium'):
 355     if not self.use_local_repo:
 356       return self._GetSVNRevisionFromGitHashWithoutGitCheckout(git_sha1, depot)
 357     else:
 358       return self._GetSVNRevisionFromGitHashFromGitCheckout(git_sha1, depot)
 359
 360   def GetRevList(self):
 361     """Gets the list of revision numbers between self.good_revision and
 362     self.bad_revision."""
 363     # Download the revlist and filter for just the range between good and bad.
 364     minrev = min(self.good_revision, self.bad_revision)
 365     maxrev = max(self.good_revision, self.bad_revision)
 366     revlist_all = map(int, self.ParseDirectoryIndex())
 367
 368     revlist = [x for x in revlist_all if x >= int(minrev) and x <= int(maxrev)]
 369     revlist.sort()
 370
 371     # Set good and bad revisions to be legit revisions.
 372     if revlist:
 373       if self.good_revision < self.bad_revision:
 374         self.good_revision = revlist[0]
 375         self.bad_revision = revlist[-1]
 376       else:
 377         self.bad_revision = revlist[0]
 378         self.good_revision = revlist[-1]
 379
 380       # Fix chromium rev so that the deps blink revision matches REVISIONS file.
 381       if self.base_url == WEBKIT_BASE_URL:
 382         revlist_all.sort()
 383         self.good_revision = FixChromiumRevForBlink(revlist,
 384                                                     revlist_all,
 385                                                     self,
 386                                                     self.good_revision)
 387         self.bad_revision = FixChromiumRevForBlink(revlist,
 388                                                    revlist_all,
 389                                                    self,
 390                                                    self.bad_revision)
 391     return revlist
 392
 393   def GetOfficialBuildsList(self):
 394     """Gets the list of official build numbers between self.good_revision and
 395     self.bad_revision."""
 396     # Download the revlist and filter for just the range between good and bad.
 397     minrev = min(self.good_revision, self.bad_revision)
 398     maxrev = max(self.good_revision, self.bad_revision)
 399     handle = urllib.urlopen(OFFICIAL_BASE_URL)
 400     dirindex = handle.read()
 401     handle.close()
 402     build_numbers = re.findall(r'<a href="([0-9][0-9].*)/">', dirindex)
 403     final_list = []
 404     i = 0
 405     parsed_build_numbers = [LooseVersion(x) for x in build_numbers]
 406     for build_number in sorted(parsed_build_numbers):
 407       path = (OFFICIAL_BASE_URL + '/' + str(build_number) + '/' +
 408               self._listing_platform_dir + self.archive_name)
 409       i = i + 1
 410       try:
 411         connection = urllib.urlopen(path)
 412         connection.close()
 413         if build_number > maxrev:
 414           break
 415         if build_number >= minrev:
 416           final_list.append(str(build_number))
 417       except urllib.HTTPError:
 418         pass
 419     return final_list
 420
 421 def UnzipFilenameToDir(filename, directory):
 422   """Unzip |filename| to |directory|."""
 423   cwd = os.getcwd()
 424   if not os.path.isabs(filename):
 425     filename = os.path.join(cwd, filename)
 426   zf = zipfile.ZipFile(filename)
 427   # Make base.
 428   if not os.path.isdir(directory):
 429     os.mkdir(directory)
 430   os.chdir(directory)
 431   # Extract files.
 432   for info in zf.infolist():
 433     name = info.filename
 434     if name.endswith('/'):  # dir
 435       if not os.path.isdir(name):
 436         os.makedirs(name)
 437     else:  # file
 438       directory = os.path.dirname(name)
 439       if not os.path.isdir(directory):
 440         os.makedirs(directory)
 441       out = open(name, 'wb')
 442       out.write(zf.read(name))
 443       out.close()
 444     # Set permissions. Permission info in external_attr is shifted 16 bits.
 445     os.chmod(name, info.external_attr >> 16L)
 446   os.chdir(cwd)
 447
 448
 449 def FetchRevision(context, rev, filename, quit_event=None, progress_event=None):
 450   """Downloads and unzips revision |rev|.
 451   @param context A PathContext instance.
 452   @param rev The Chromium revision number/tag to download.
 453   @param filename The destination for the downloaded file.
 454   @param quit_event A threading.Event which will be set by the master thread to
 455                     indicate that the download should be aborted.
 456   @param progress_event A threading.Event which will be set by the master thread
 457                     to indicate that the progress of the download should be
 458                     displayed.
 459   """
 460   def ReportHook(blocknum, blocksize, totalsize):
 461     if quit_event and quit_event.isSet():
 462       raise RuntimeError('Aborting download of revision %s' % str(rev))
 463     if progress_event and progress_event.isSet():
 464       size = blocknum * blocksize
 465       if totalsize == -1:  # Total size not known.
 466         progress = 'Received %d bytes' % size
 467       else:
 468         size = min(totalsize, size)
 469         progress = 'Received %d of %d bytes, %.2f%%' % (
 470             size, totalsize, 100.0 * size / totalsize)
 471       # Send a \r to let all progress messages use just one line of output.
 472       sys.stdout.write('\r' + progress)
 473       sys.stdout.flush()
 474
 475   download_url = context.GetDownloadURL(rev)
 476   try:
 477     urllib.urlretrieve(download_url, filename, ReportHook)
 478     if progress_event and progress_event.isSet():
 479       print
 480   except RuntimeError:
 481     pass
 482
 483
 484 def RunRevision(context, revision, zip_file, profile, num_runs, command, args):
 485   """Given a zipped revision, unzip it and run the test."""
 486   print 'Trying revision %s...' % str(revision)
 487
 488   # Create a temp directory and unzip the revision into it.
 489   cwd = os.getcwd()
 490   tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
 491   UnzipFilenameToDir(zip_file, tempdir)
 492   os.chdir(tempdir)
 493
 494   # Run the build as many times as specified.
 495   testargs = ['--user-data-dir=%s' % profile] + args
 496   # The sandbox must be run as root on Official Chrome, so bypass it.
 497   if ((context.is_official or context.flash_path or context.pdf_path) and
 498       context.platform.startswith('linux')):
 499     testargs.append('--no-sandbox')
 500   if context.flash_path:
 501     testargs.append('--ppapi-flash-path=%s' % context.flash_path)
 502     # We have to pass a large enough Flash version, which currently needs not
 503     # be correct. Instead of requiring the user of the script to figure out and
 504     # pass the correct version we just spoof it.
 505     testargs.append('--ppapi-flash-version=99.9.999.999')
 506
 507   # TODO(vitalybuka): Remove in the future. See crbug.com/395687.
 508   if context.pdf_path:
 509     shutil.copy(context.pdf_path,
 510                 os.path.dirname(context.GetLaunchPath(revision)))
 511     testargs.append('--enable-print-preview')
 512
 513   runcommand = []
 514   for token in shlex.split(command):
 515     if token == '%a':
 516       runcommand.extend(testargs)
 517     else:
 518       runcommand.append(
 519           token.replace('%p', os.path.abspath(context.GetLaunchPath(revision))).
 520           replace('%s', ' '.join(testargs)))
 521
 522   results = []
 523   for _ in range(num_runs):
 524     subproc = subprocess.Popen(runcommand,
 525                                bufsize=-1,
 526                                stdout=subprocess.PIPE,
 527                                stderr=subprocess.PIPE)
 528     (stdout, stderr) = subproc.communicate()
 529     results.append((subproc.returncode, stdout, stderr))
 530
 531   os.chdir(cwd)
 532   try:
 533     shutil.rmtree(tempdir, True)
 534   except Exception:
 535     pass
 536
 537   for (returncode, stdout, stderr) in results:
 538     if returncode:
 539       return (returncode, stdout, stderr)
 540   return results[0]
 541
 542
 543 # The arguments official_builds, status, stdout and stderr are unused.
 544 # They are present here because this function is passed to Bisect which then
 545 # calls it with 5 arguments.
 546 # pylint: disable=W0613
 547 def AskIsGoodBuild(rev, official_builds, status, stdout, stderr):
 548   """Asks the user whether build |rev| is good or bad."""
 549   # Loop until we get a response that we can parse.
 550   while True:
 551     response = raw_input('Revision %s is '
 552                          '[(g)ood/(b)ad/(r)etry/(u)nknown/(q)uit]: ' %
 553                          str(rev))
 554     if response and response in ('g', 'b', 'r', 'u'):
 555       return response
 556     if response and response == 'q':
 557       raise SystemExit()
 558
 559
 560 def IsGoodASANBuild(rev, official_builds, status, stdout, stderr):
 561   """Determine if an ASAN build |rev| is good or bad
 562
 563   Will examine stderr looking for the error message emitted by ASAN. If not
 564   found then will fallback to asking the user."""
 565   if stderr:
 566     bad_count = 0
 567     for line in stderr.splitlines():
 568       print line
 569       if line.find('ERROR: AddressSanitizer:') != -1:
 570         bad_count += 1
 571     if bad_count > 0:
 572       print 'Revision %d determined to be bad.' % rev
 573       return 'b'
 574   return AskIsGoodBuild(rev, official_builds, status, stdout, stderr)
 575
 576 class DownloadJob(object):
 577   """DownloadJob represents a task to download a given Chromium revision."""
 578
 579   def __init__(self, context, name, rev, zip_file):
 580     super(DownloadJob, self).__init__()
 581     # Store off the input parameters.
 582     self.context = context
 583     self.name = name
 584     self.rev = rev
 585     self.zip_file = zip_file
 586     self.quit_event = threading.Event()
 587     self.progress_event = threading.Event()
 588     self.thread = None
 589
 590   def Start(self):
 591     """Starts the download."""
 592     fetchargs = (self.context,
 593                  self.rev,
 594                  self.zip_file,
 595                  self.quit_event,
 596                  self.progress_event)
 597     self.thread = threading.Thread(target=FetchRevision,
 598                                    name=self.name,
 599                                    args=fetchargs)
 600     self.thread.start()
 601
 602   def Stop(self):
 603     """Stops the download which must have been started previously."""
 604     assert self.thread, 'DownloadJob must be started before Stop is called.'
 605     self.quit_event.set()
 606     self.thread.join()
 607     os.unlink(self.zip_file)
 608
 609   def WaitFor(self):
 610     """Prints a message and waits for the download to complete. The download
 611     must have been started previously."""
 612     assert self.thread, 'DownloadJob must be started before WaitFor is called.'
 613     print 'Downloading revision %s...' % str(self.rev)
 614     self.progress_event.set()  # Display progress of download.
 615     self.thread.join()
 616
 617
 618 def Bisect(context,
 619            num_runs=1,
 620            command='%p %a',
 621            try_args=(),
 622            profile=None,
 623            interactive=True,
 624            evaluate=AskIsGoodBuild):
 625   """Given known good and known bad revisions, run a binary search on all
 626   archived revisions to determine the last known good revision.
 627
 628   @param context PathContext object initialized with user provided parameters.
 629   @param num_runs Number of times to run each build for asking good/bad.
 630   @param try_args A tuple of arguments to pass to the test application.
 631   @param profile The name of the user profile to run with.
 632   @param interactive If it is false, use command exit code for good or bad
 633                      judgment of the argument build.
 634   @param evaluate A function which returns 'g' if the argument build is good,
 635                   'b' if it's bad or 'u' if unknown.
 636
 637   Threading is used to fetch Chromium revisions in the background, speeding up
 638   the user's experience. For example, suppose the bounds of the search are
 639   good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
 640   whether revision 50 is good or bad, the next revision to check will be either
 641   25 or 75. So, while revision 50 is being checked, the script will download
 642   revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
 643   known:
 644
 645     - If rev 50 is good, the download of rev 25 is cancelled, and the next test
 646       is run on rev 75.
 647
 648     - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
 649       is run on rev 25.
 650   """
 651
 652   if not profile:
 653     profile = 'profile'
 654
 655   good_rev = context.good_revision
 656   bad_rev = context.bad_revision
 657   cwd = os.getcwd()
 658
 659   print 'Downloading list of known revisions...',
 660   if not context.use_local_repo:
 661     print '(use --use-local-repo for speed if you have a local checkout)'
 662   else:
 663     print
 664   _GetDownloadPath = lambda rev: os.path.join(cwd,
 665       '%s-%s' % (str(rev), context.archive_name))
 666   if context.is_official:
 667     revlist = context.GetOfficialBuildsList()
 668   else:
 669     revlist = context.GetRevList()
 670
 671   # Get a list of revisions to bisect across.
 672   if len(revlist) < 2:  # Don't have enough builds to bisect.
 673     msg = 'We don\'t have enough builds to bisect. revlist: %s' % revlist
 674     raise RuntimeError(msg)
 675
 676   # Figure out our bookends and first pivot point; fetch the pivot revision.
 677   minrev = 0
 678   maxrev = len(revlist) - 1
 679   pivot = maxrev / 2
 680   rev = revlist[pivot]
 681   zip_file = _GetDownloadPath(rev)
 682   fetch = DownloadJob(context, 'initial_fetch', rev, zip_file)
 683   fetch.Start()
 684   fetch.WaitFor()
 685
 686   # Binary search time!
 687   while fetch and fetch.zip_file and maxrev - minrev > 1:
 688     if bad_rev < good_rev:
 689       min_str, max_str = 'bad', 'good'
 690     else:
 691       min_str, max_str = 'good', 'bad'
 692     print 'Bisecting range [%s (%s), %s (%s)].' % (revlist[minrev], min_str,
 693                                                    revlist[maxrev], max_str)
 694
 695     # Pre-fetch next two possible pivots
 696     #   - down_pivot is the next revision to check if the current revision turns
 697     #     out to be bad.
 698     #   - up_pivot is the next revision to check if the current revision turns
 699     #     out to be good.
 700     down_pivot = int((pivot - minrev) / 2) + minrev
 701     down_fetch = None
 702     if down_pivot != pivot and down_pivot != minrev:
 703       down_rev = revlist[down_pivot]
 704       down_fetch = DownloadJob(context, 'down_fetch', down_rev,
 705                                _GetDownloadPath(down_rev))
 706       down_fetch.Start()
 707
 708     up_pivot = int((maxrev - pivot) / 2) + pivot
 709     up_fetch = None
 710     if up_pivot != pivot and up_pivot != maxrev:
 711       up_rev = revlist[up_pivot]
 712       up_fetch = DownloadJob(context, 'up_fetch', up_rev,
 713                              _GetDownloadPath(up_rev))
 714       up_fetch.Start()
 715
 716     # Run test on the pivot revision.
 717     status = None
 718     stdout = None
 719     stderr = None
 720     try:
 721       (status, stdout, stderr) = RunRevision(context,
 722                                              rev,
 723                                              fetch.zip_file,
 724                                              profile,
 725                                              num_runs,
 726                                              command,
 727                                              try_args)
 728     except Exception, e:
 729       print >> sys.stderr, e
 730
 731     # Call the evaluate function to see if the current revision is good or bad.
 732     # On that basis, kill one of the background downloads and complete the
 733     # other, as described in the comments above.
 734     try:
 735       if not interactive:
 736         if status:
 737           answer = 'b'
 738           print 'Bad revision: %s' % rev
 739         else:
 740           answer = 'g'
 741           print 'Good revision: %s' % rev
 742       else:
 743         answer = evaluate(rev, context.is_official, status, stdout, stderr)
 744       if ((answer == 'g' and good_rev < bad_rev)
 745           or (answer == 'b' and bad_rev < good_rev)):
 746         fetch.Stop()
 747         minrev = pivot
 748         if down_fetch:
 749           down_fetch.Stop()  # Kill the download of the older revision.
 750           fetch = None
 751         if up_fetch:
 752           up_fetch.WaitFor()
 753           pivot = up_pivot
 754           fetch = up_fetch
 755       elif ((answer == 'b' and good_rev < bad_rev)
 756             or (answer == 'g' and bad_rev < good_rev)):
 757         fetch.Stop()
 758         maxrev = pivot
 759         if up_fetch:
 760           up_fetch.Stop()  # Kill the download of the newer revision.
 761           fetch = None
 762         if down_fetch:
 763           down_fetch.WaitFor()
 764           pivot = down_pivot
 765           fetch = down_fetch
 766       elif answer == 'r':
 767         pass  # Retry requires no changes.
 768       elif answer == 'u':
 769         # Nuke the revision from the revlist and choose a new pivot.
 770         fetch.Stop()
 771         revlist.pop(pivot)
 772         maxrev -= 1  # Assumes maxrev >= pivot.
 773
 774         if maxrev - minrev > 1:
 775           # Alternate between using down_pivot or up_pivot for the new pivot
 776           # point, without affecting the range. Do this instead of setting the
 777           # pivot to the midpoint of the new range because adjacent revisions
 778           # are likely affected by the same issue that caused the (u)nknown
 779           # response.
 780           if up_fetch and down_fetch:
 781             fetch = [up_fetch, down_fetch][len(revlist) % 2]
 782           elif up_fetch:
 783             fetch = up_fetch
 784           else:
 785             fetch = down_fetch
 786           fetch.WaitFor()
 787           if fetch == up_fetch:
 788             pivot = up_pivot - 1  # Subtracts 1 because revlist was resized.
 789           else:
 790             pivot = down_pivot
 791           zip_file = fetch.zip_file
 792
 793         if down_fetch and fetch != down_fetch:
 794           down_fetch.Stop()
 795         if up_fetch and fetch != up_fetch:
 796           up_fetch.Stop()
 797       else:
 798         assert False, 'Unexpected return value from evaluate(): ' + answer
 799     except SystemExit:
 800       print 'Cleaning up...'
 801       for f in [_GetDownloadPath(revlist[down_pivot]),
 802                 _GetDownloadPath(revlist[up_pivot])]:
 803         try:
 804           os.unlink(f)
 805         except OSError:
 806           pass
 807       sys.exit(0)
 808
 809     rev = revlist[pivot]
 810
 811   return (revlist[minrev], revlist[maxrev], context)
 812
 813
 814 def GetBlinkDEPSRevisionForChromiumRevision(rev):
 815   """Returns the blink revision that was in REVISIONS file at
 816   chromium revision |rev|."""
 817   # . doesn't match newlines without re.DOTALL, so this is safe.
 818   blink_re = re.compile(r'webkit_revision\D*(\d+)')
 819   url = urllib.urlopen(DEPS_FILE % rev)
 820   m = blink_re.search(url.read())
 821   url.close()
 822   if m:
 823     return int(m.group(1))
 824   else:
 825     raise Exception('Could not get Blink revision for Chromium rev %d' % rev)
 826
 827
 828 def GetBlinkRevisionForChromiumRevision(context, rev):
 829   """Returns the blink revision that was in REVISIONS file at
 830   chromium revision |rev|."""
 831   def _IsRevisionNumber(revision):
 832     if isinstance(revision, int):
 833       return True
 834     else:
 835       return revision.isdigit()
 836   if str(rev) in context.githash_svn_dict:
 837     rev = context.githash_svn_dict[str(rev)]
 838   file_url = '%s/%s%s/REVISIONS' % (context.base_url,
 839                                     context._listing_platform_dir, rev)
 840   url = urllib.urlopen(file_url)
 841   if url.getcode() == 200:
 842     try:
 843       data = json.loads(url.read())
 844     except ValueError:
 845       print 'ValueError for JSON URL: %s' % file_url
 846       raise ValueError
 847   else:
 848     raise ValueError
 849   url.close()
 850   if 'webkit_revision' in data:
 851     blink_rev = data['webkit_revision']
 852     if not _IsRevisionNumber(blink_rev):
 853       blink_rev = int(context.GetSVNRevisionFromGitHash(blink_rev, 'blink'))
 854     return blink_rev
 855   else:
 856     raise Exception('Could not get blink revision for cr rev %d' % rev)
 857
 858
 859 def FixChromiumRevForBlink(revisions_final, revisions, self, rev):
 860   """Returns the chromium revision that has the correct blink revision
 861   for blink bisect, DEPS and REVISIONS file might not match since
 862   blink snapshots point to tip of tree blink.
 863   Note: The revisions_final variable might get modified to include
 864   additional revisions."""
 865   blink_deps_rev = GetBlinkDEPSRevisionForChromiumRevision(rev)
 866
 867   while (GetBlinkRevisionForChromiumRevision(self, rev) > blink_deps_rev):
 868     idx = revisions.index(rev)
 869     if idx > 0:
 870       rev = revisions[idx-1]
 871       if rev not in revisions_final:
 872         revisions_final.insert(0, rev)
 873
 874   revisions_final.sort()
 875   return rev
 876
 877
 878 def GetChromiumRevision(context, url):
 879   """Returns the chromium revision read from given URL."""
 880   try:
 881     # Location of the latest build revision number
 882     latest_revision = urllib.urlopen(url).read()
 883     if latest_revision.isdigit():
 884       return int(latest_revision)
 885     return context.GetSVNRevisionFromGitHash(latest_revision)
 886   except Exception:
 887     print 'Could not determine latest revision. This could be bad...'
 888     return 999999999
 889
 890
 891 def main():
 892   usage = ('%prog [options] [-- chromium-options]\n'
 893            'Perform binary search on the snapshot builds to find a minimal\n'
 894            'range of revisions where a behavior change happened. The\n'
 895            'behaviors are described as "good" and "bad".\n'
 896            'It is NOT assumed that the behavior of the later revision is\n'
 897            'the bad one.\n'
 898            '\n'
 899            'Revision numbers should use\n'
 900            '  Official versions (e.g. 1.0.1000.0) for official builds. (-o)\n'
 901            '  SVN revisions (e.g. 123456) for chromium builds, from trunk.\n'
 902            '    Use base_trunk_revision from http://omahaproxy.appspot.com/\n'
 903            '    for earlier revs.\n'
 904            '    Chrome\'s about: build number and omahaproxy branch_revision\n'
 905            '    are incorrect, they are from branches.\n'
 906            '\n'
 907            'Tip: add "-- --no-first-run" to bypass the first run prompts.')
 908   parser = optparse.OptionParser(usage=usage)
 909   # Strangely, the default help output doesn't include the choice list.
 910   choices = ['mac', 'win', 'linux', 'linux64', 'linux-arm']
 911             # linux-chromiumos lacks a continuous archive http://crbug.com/78158
 912   parser.add_option('-a', '--archive',
 913                     choices=choices,
 914                     help='The buildbot archive to bisect [%s].' %
 915                          '|'.join(choices))
 916   parser.add_option('-o',
 917                     action='store_true',
 918                     dest='official_builds',
 919                     help='Bisect across official Chrome builds (internal '
 920                          'only) instead of Chromium archives.')
 921   parser.add_option('-b', '--bad',
 922                     type='str',
 923                     help='A bad revision to start bisection. '
 924                          'May be earlier or later than the good revision. '
 925                          'Default is HEAD.')
 926   parser.add_option('-f', '--flash_path',
 927                     type='str',
 928                     help='Absolute path to a recent Adobe Pepper Flash '
 929                          'binary to be used in this bisection (e.g. '
 930                          'on Windows C:\...\pepflashplayer.dll and on Linux '
 931                          '/opt/google/chrome/PepperFlash/'
 932                          'libpepflashplayer.so).')
 933   parser.add_option('-d', '--pdf_path',
 934                     type='str',
 935                     help='Absolute path to a recent PDF plugin '
 936                          'binary to be used in this bisection (e.g. '
 937                          'on Windows C:\...\pdf.dll and on Linux '
 938                          '/opt/google/chrome/libpdf.so). Option also enables '
 939                          'print preview.')
 940   parser.add_option('-g', '--good',
 941                     type='str',
 942                     help='A good revision to start bisection. ' +
 943                          'May be earlier or later than the bad revision. ' +
 944                          'Default is 0.')
 945   parser.add_option('-p', '--profile', '--user-data-dir',
 946                     type='str',
 947                     default='profile',
 948                     help='Profile to use; this will not reset every run. '
 949                          'Defaults to a clean profile.')
 950   parser.add_option('-t', '--times',
 951                     type='int',
 952                     default=1,
 953                     help='Number of times to run each build before asking '
 954                          'if it\'s good or bad. Temporary profiles are reused.')
 955   parser.add_option('-c', '--command',
 956                     type='str',
 957                     default='%p %a',
 958                     help='Command to execute. %p and %a refer to Chrome '
 959                          'executable and specified extra arguments '
 960                          'respectively. Use %s to specify all extra arguments '
 961                          'as one string. Defaults to "%p %a". Note that any '
 962                          'extra paths specified should be absolute.')
 963   parser.add_option('-l', '--blink',
 964                     action='store_true',
 965                     help='Use Blink bisect instead of Chromium. ')
 966   parser.add_option('', '--not-interactive',
 967                     action='store_true',
 968                     default=False,
 969                     help='Use command exit code to tell good/bad revision.')
 970   parser.add_option('--asan',
 971                     dest='asan',
 972                     action='store_true',
 973                     default=False,
 974                     help='Allow the script to bisect ASAN builds')
 975   parser.add_option('--use-local-repo',
 976                     dest='use_local_repo',
 977                     action='store_true',
 978                     default=False,
 979                     help='Allow the script to convert git SHA1 to SVN '
 980                          'revision using "git svn find-rev <SHA1>" '
 981                          'command from a Chromium checkout.')
 982
 983   (opts, args) = parser.parse_args()
 984
 985   if opts.archive is None:
 986     print 'Error: missing required parameter: --archive'
 987     print
 988     parser.print_help()
 989     return 1
 990
 991   if opts.asan:
 992     supported_platforms = ['linux', 'mac', 'win']
 993     if opts.archive not in supported_platforms:
 994       print 'Error: ASAN bisecting only supported on these platforms: [%s].' % (
 995             '|'.join(supported_platforms))
 996       return 1
 997     if opts.official_builds:
 998       print 'Error: Do not yet support bisecting official ASAN builds.'
 999       return 1
1000
1001   if opts.asan:
1002     base_url = ASAN_BASE_URL
1003   elif opts.blink:
1004     base_url = WEBKIT_BASE_URL
1005   else:
1006     base_url = CHROMIUM_BASE_URL
1007
1008   # Create the context. Initialize 0 for the revisions as they are set below.
1009   context = PathContext(base_url, opts.archive, opts.good, opts.bad,
1010                         opts.official_builds, opts.asan, opts.use_local_repo,
1011                         opts.flash_path, opts.pdf_path)
1012   # Pick a starting point, try to get HEAD for this.
1013   if not opts.bad:
1014     context.bad_revision = '999.0.0.0'
1015     context.bad_revision = GetChromiumRevision(
1016         context, context.GetLastChangeURL())
1017
1018   # Find out when we were good.
1019   if not opts.good:
1020     context.good_revision = '0.0.0.0' if opts.official_builds else 0
1021
1022   if opts.flash_path:
1023     msg = 'Could not find Flash binary at %s' % opts.flash_path
1024     assert os.path.exists(opts.flash_path), msg
1025
1026   if opts.pdf_path:
1027     msg = 'Could not find PDF binary at %s' % opts.pdf_path
1028     assert os.path.exists(opts.pdf_path), msg
1029
1030   if opts.official_builds:
1031     context.good_revision = LooseVersion(context.good_revision)
1032     context.bad_revision = LooseVersion(context.bad_revision)
1033   else:
1034     context.good_revision = int(context.good_revision)
1035     context.bad_revision = int(context.bad_revision)
1036
1037   if opts.times < 1:
1038     print('Number of times to run (%d) must be greater than or equal to 1.' %
1039           opts.times)
1040     parser.print_help()
1041     return 1
1042
1043   if opts.asan:
1044     evaluator = IsGoodASANBuild
1045   else:
1046     evaluator = AskIsGoodBuild
1047
1048   # Save these revision numbers to compare when showing the changelog URL
1049   # after the bisect.
1050   good_rev = context.good_revision
1051   bad_rev = context.bad_revision
1052
1053   (min_chromium_rev, max_chromium_rev, context) = Bisect(
1054       context, opts.times, opts.command, args, opts.profile,
1055       not opts.not_interactive, evaluator)
1056
1057   # Get corresponding blink revisions.
1058   try:
1059     min_blink_rev = GetBlinkRevisionForChromiumRevision(context,
1060                                                         min_chromium_rev)
1061     max_blink_rev = GetBlinkRevisionForChromiumRevision(context,
1062                                                         max_chromium_rev)
1063   except Exception:
1064     # Silently ignore the failure.
1065     min_blink_rev, max_blink_rev = 0, 0
1066
1067   if opts.blink:
1068     # We're done. Let the user know the results in an official manner.
1069     if good_rev > bad_rev:
1070       print DONE_MESSAGE_GOOD_MAX % (str(min_blink_rev), str(max_blink_rev))
1071     else:
1072       print DONE_MESSAGE_GOOD_MIN % (str(min_blink_rev), str(max_blink_rev))
1073
1074     print 'BLINK CHANGELOG URL:'
1075     print '  ' + BLINK_CHANGELOG_URL % (max_blink_rev, min_blink_rev)
1076
1077   else:
1078     # We're done. Let the user know the results in an official manner.
1079     if good_rev > bad_rev:
1080       print DONE_MESSAGE_GOOD_MAX % (str(min_chromium_rev),
1081                                      str(max_chromium_rev))
1082     else:
1083       print DONE_MESSAGE_GOOD_MIN % (str(min_chromium_rev),
1084                                      str(max_chromium_rev))
1085     if min_blink_rev != max_blink_rev:
1086       print ('NOTE: There is a Blink roll in the range, '
1087              'you might also want to do a Blink bisect.')
1088
1089     print 'CHANGELOG URL:'
1090     if opts.official_builds:
1091       print OFFICIAL_CHANGELOG_URL % (min_chromium_rev, max_chromium_rev)
1092     else:
1093       print '  ' + CHANGELOG_URL % (min_chromium_rev, max_chromium_rev)
1094
1095
1096 if __name__ == '__main__':
1097   sys.exit(main())