src/third_party/chromite/cbuildbot/stages/sync_stages.py

   1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """Module containing the sync stages."""
   6
   7 from __future__ import print_function
   8
   9 import contextlib
  10 import datetime
  11 import logging
  12 import os
  13 import sys
  14 from xml.etree import ElementTree
  15 from xml.dom import minidom
  16
  17 from chromite.cbuildbot import cbuildbot_config
  18 from chromite.cbuildbot import failures_lib
  19 from chromite.cbuildbot import constants
  20 from chromite.cbuildbot import lkgm_manager
  21 from chromite.cbuildbot import manifest_version
  22 from chromite.cbuildbot import repository
  23 from chromite.cbuildbot import tree_status
  24 from chromite.cbuildbot import trybot_patch_pool
  25 from chromite.cbuildbot import validation_pool
  26 from chromite.cbuildbot.stages import generic_stages
  27 from chromite.cbuildbot.stages import build_stages
  28 from chromite.lib import commandline
  29 from chromite.lib import cros_build_lib
  30 from chromite.lib import git
  31 from chromite.lib import osutils
  32 from chromite.lib import patch as cros_patch
  33 from chromite.scripts import cros_mark_chrome_as_stable
  34
  35
  36 PRE_CQ = validation_pool.PRE_CQ
  37
  38
  39 class PatchChangesStage(generic_stages.BuilderStage):
  40   """Stage that patches a set of Gerrit changes to the buildroot source tree."""
  41
  42   def __init__(self, builder_run, patch_pool, **kwargs):
  43     """Construct a PatchChangesStage.
  44
  45     Args:
  46       builder_run: BuilderRun object.
  47       patch_pool: A TrybotPatchPool object containing the different types of
  48                   patches to apply.
  49     """
  50     super(PatchChangesStage, self).__init__(builder_run, **kwargs)
  51     self.patch_pool = patch_pool
  52
  53   @staticmethod
  54   def _CheckForDuplicatePatches(_series, changes):
  55     conflicts = {}
  56     duplicates = []
  57     for change in changes:
  58       if change.id is None:
  59         cros_build_lib.Warning(
  60             "Change %s lacks a usable ChangeId; duplicate checking cannot "
  61             "be done for this change.  If cherry-picking fails, this is a "
  62             "potential cause.", change)
  63         continue
  64       conflicts.setdefault(change.id, []).append(change)
  65
  66     duplicates = [x for x in conflicts.itervalues() if len(x) > 1]
  67     if not duplicates:
  68       return changes
  69
  70     for conflict in duplicates:
  71       cros_build_lib.Error(
  72           "Changes %s conflict with each other- they have same id %s.",
  73           ', '.join(map(str, conflict)), conflict[0].id)
  74
  75     cros_build_lib.Die("Duplicate patches were encountered: %s", duplicates)
  76
  77   def _PatchSeriesFilter(self, series, changes):
  78     return self._CheckForDuplicatePatches(series, changes)
  79
  80   def _ApplyPatchSeries(self, series, patch_pool, **kwargs):
  81     """Applies a patch pool using a patch series."""
  82     kwargs.setdefault('frozen', False)
  83     # Honor the given ordering, so that if a gerrit/remote patch
  84     # conflicts w/ a local patch, the gerrit/remote patch are
  85     # blamed rather than local (patch ordering is typically
  86     # local, gerrit, then remote).
  87     kwargs.setdefault('honor_ordering', True)
  88     kwargs['changes_filter'] = self._PatchSeriesFilter
  89
  90     _applied, failed_tot, failed_inflight = series.Apply(
  91         list(patch_pool), **kwargs)
  92
  93     failures = failed_tot + failed_inflight
  94     if failures:
  95       self.HandleApplyFailures(failures)
  96
  97   def HandleApplyFailures(self, failures):
  98     cros_build_lib.Die("Failed applying patches: %s",
  99                        "\n".join(map(str, failures)))
 100
 101   def PerformStage(self):
 102     class NoisyPatchSeries(validation_pool.PatchSeries):
 103       """Custom PatchSeries that adds links to buildbot logs for remote trys."""
 104
 105       def ApplyChange(self, change):
 106         if isinstance(change, cros_patch.GerritPatch):
 107           cros_build_lib.PrintBuildbotLink(str(change), change.url)
 108         elif isinstance(change, cros_patch.UploadedLocalPatch):
 109           cros_build_lib.PrintBuildbotStepText(str(change))
 110
 111         return validation_pool.PatchSeries.ApplyChange(self, change)
 112
 113     # If we're an external builder, ignore internal patches.
 114     helper_pool = validation_pool.HelperPool.SimpleCreate(
 115         cros_internal=self._run.config.internal, cros=True)
 116
 117     # Limit our resolution to non-manifest patches.
 118     patch_series = NoisyPatchSeries(
 119         self._build_root,
 120         helper_pool=helper_pool,
 121         deps_filter_fn=lambda p: not trybot_patch_pool.ManifestFilter(p))
 122
 123     self._ApplyPatchSeries(patch_series, self.patch_pool)
 124
 125
 126 class BootstrapStage(PatchChangesStage):
 127   """Stage that patches a chromite repo and re-executes inside it.
 128
 129   Attributes:
 130     returncode - the returncode of the cbuildbot re-execution.  Valid after
 131                  calling stage.Run().
 132   """
 133   option_name = 'bootstrap'
 134
 135   def __init__(self, builder_run, chromite_patch_pool,
 136                manifest_patch_pool=None, **kwargs):
 137     super(BootstrapStage, self).__init__(
 138         builder_run, trybot_patch_pool.TrybotPatchPool(), **kwargs)
 139     self.chromite_patch_pool = chromite_patch_pool
 140     self.manifest_patch_pool = manifest_patch_pool
 141     self.returncode = None
 142
 143   def _ApplyManifestPatches(self, patch_pool):
 144     """Apply a pool of manifest patches to a temp manifest checkout.
 145
 146     Args:
 147       patch_pool: The pool to apply.
 148
 149     Returns:
 150       The path to the patched manifest checkout.
 151
 152     Raises:
 153       Exception, if the new patched manifest cannot be parsed.
 154     """
 155     checkout_dir = os.path.join(self.tempdir, 'manfest-checkout')
 156     repository.CloneGitRepo(checkout_dir,
 157                             self._run.config.manifest_repo_url)
 158
 159     patch_series = validation_pool.PatchSeries.WorkOnSingleRepo(
 160         checkout_dir, tracking_branch=self._run.manifest_branch)
 161
 162     self._ApplyPatchSeries(patch_series, patch_pool)
 163     # Create the branch that 'repo init -b <target_branch> -u <patched_repo>'
 164     # will look for.
 165     cmd = ['branch', '-f', self._run.manifest_branch,
 166            constants.PATCH_BRANCH]
 167     git.RunGit(checkout_dir, cmd)
 168
 169     # Verify that the patched manifest loads properly. Propagate any errors as
 170     # exceptions.
 171     manifest = os.path.join(checkout_dir, self._run.config.manifest)
 172     git.Manifest.Cached(manifest, manifest_include_dir=checkout_dir)
 173     return checkout_dir
 174
 175   @staticmethod
 176   def _FilterArgsForApi(parsed_args, api_minor):
 177     """Remove arguments that are introduced after an api version."""
 178     def filter_fn(passed_arg):
 179       return passed_arg.opt_inst.api_version <= api_minor
 180
 181     accepted, removed = commandline.FilteringParser.FilterArgs(
 182         parsed_args, filter_fn)
 183
 184     if removed:
 185       cros_build_lib.Warning('The following arguments were removed due to api: '
 186                              "'%s'" % ' '.join(removed))
 187     return accepted
 188
 189   @classmethod
 190   def FilterArgsForTargetCbuildbot(cls, buildroot, cbuildbot_path, options):
 191     _, minor = cros_build_lib.GetTargetChromiteApiVersion(buildroot)
 192     args = [cbuildbot_path]
 193     args.extend(options.build_targets)
 194     args.extend(cls._FilterArgsForApi(options.parsed_args, minor))
 195
 196     # Only pass down --cache-dir if it was specified. By default, we want
 197     # the cache dir to live in the root of each checkout, so this means that
 198     # each instance of cbuildbot needs to calculate the default separately.
 199     if minor >= 2 and options.cache_dir_specified:
 200       args += ['--cache-dir', options.cache_dir]
 201
 202     return args
 203
 204   def HandleApplyFailures(self, failures):
 205     """Handle the case where patches fail to apply."""
 206     if self._run.options.pre_cq or self._run.config.pre_cq:
 207       # Let the PreCQSync stage handle this failure. The PreCQSync stage will
 208       # comment on CLs with the appropriate message when they fail to apply.
 209       #
 210       # WARNING: For manifest patches, the Pre-CQ attempts to apply external
 211       # patches to the internal manifest, and this means we may flag a conflict
 212       # here even if the patch applies cleanly. TODO(davidjames): Fix this.
 213       cros_build_lib.PrintBuildbotStepWarnings()
 214       cros_build_lib.Error('Failed applying patches: %s',
 215                            '\n'.join(map(str, failures)))
 216     else:
 217       PatchChangesStage.HandleApplyFailures(self, failures)
 218
 219   #pylint: disable=E1101
 220   @osutils.TempDirDecorator
 221   def PerformStage(self):
 222     # The plan for the builders is to use master branch to bootstrap other
 223     # branches. Now, if we wanted to test patches for both the bootstrap code
 224     # (on master) and the branched chromite (say, R20), we need to filter the
 225     # patches by branch.
 226     filter_branch = self._run.manifest_branch
 227     if self._run.options.test_bootstrap:
 228       filter_branch = 'master'
 229
 230     chromite_dir = os.path.join(self.tempdir, 'chromite')
 231     reference_repo = os.path.join(constants.SOURCE_ROOT, 'chromite', '.git')
 232     repository.CloneGitRepo(chromite_dir, constants.CHROMITE_URL,
 233                             reference=reference_repo)
 234     git.RunGit(chromite_dir, ['checkout', filter_branch])
 235
 236     def BranchAndChromiteFilter(patch):
 237       return (trybot_patch_pool.BranchFilter(filter_branch, patch) and
 238               trybot_patch_pool.ChromiteFilter(patch))
 239
 240     patch_series = validation_pool.PatchSeries.WorkOnSingleRepo(
 241         chromite_dir, filter_branch,
 242         deps_filter_fn=BranchAndChromiteFilter)
 243
 244     filtered_pool = self.chromite_patch_pool.FilterBranch(filter_branch)
 245     if filtered_pool:
 246       self._ApplyPatchSeries(patch_series, filtered_pool)
 247
 248     cbuildbot_path = constants.PATH_TO_CBUILDBOT
 249     if not os.path.exists(os.path.join(self.tempdir, cbuildbot_path)):
 250       cbuildbot_path = 'chromite/cbuildbot/cbuildbot'
 251     # pylint: disable=W0212
 252     cmd = self.FilterArgsForTargetCbuildbot(self.tempdir, cbuildbot_path,
 253                                             self._run.options)
 254
 255     extra_params = ['--sourceroot=%s' % self._run.options.sourceroot]
 256     extra_params.extend(self._run.options.bootstrap_args)
 257     if self._run.options.test_bootstrap:
 258       # We don't want re-executed instance to see this.
 259       cmd = [a for a in cmd if a != '--test-bootstrap']
 260     else:
 261       # If we've already done the desired number of bootstraps, disable
 262       # bootstrapping for the next execution.  Also pass in the patched manifest
 263       # repository.
 264       extra_params.append('--nobootstrap')
 265       if self.manifest_patch_pool:
 266         manifest_dir = self._ApplyManifestPatches(self.manifest_patch_pool)
 267         extra_params.extend(['--manifest-repo-url', manifest_dir])
 268
 269     cmd += extra_params
 270     result_obj = cros_build_lib.RunCommand(
 271         cmd, cwd=self.tempdir, kill_timeout=30, error_code_ok=True)
 272     self.returncode = result_obj.returncode
 273
 274
 275 class SyncStage(generic_stages.BuilderStage):
 276   """Stage that performs syncing for the builder."""
 277
 278   option_name = 'sync'
 279   output_manifest_sha1 = True
 280
 281   def __init__(self, builder_run, **kwargs):
 282     super(SyncStage, self).__init__(builder_run, **kwargs)
 283     self.repo = None
 284     self.skip_sync = False
 285
 286     # TODO(mtennant): Why keep a duplicate copy of this config value
 287     # at self.internal when it can always be retrieved from config?
 288     self.internal = self._run.config.internal
 289
 290   def _GetManifestVersionsRepoUrl(self, read_only=False):
 291     return cbuildbot_config.GetManifestVersionsRepoUrl(
 292         self.internal,
 293         read_only=read_only)
 294
 295   def Initialize(self):
 296     self._InitializeRepo()
 297
 298   def _InitializeRepo(self):
 299     """Set up the RepoRepository object."""
 300     self.repo = self.GetRepoRepository()
 301
 302   def GetNextManifest(self):
 303     """Returns the manifest to use."""
 304     return self._run.config.manifest
 305
 306   def ManifestCheckout(self, next_manifest):
 307     """Checks out the repository to the given manifest."""
 308     self._Print('\n'.join(['BUILDROOT: %s' % self.repo.directory,
 309                            'TRACKING BRANCH: %s' % self.repo.branch,
 310                            'NEXT MANIFEST: %s' % next_manifest]))
 311
 312     if not self.skip_sync:
 313       self.repo.Sync(next_manifest)
 314
 315     print(self.repo.ExportManifest(mark_revision=self.output_manifest_sha1),
 316           file=sys.stderr)
 317
 318   def RunPrePatchBuild(self):
 319     """Run through a pre-patch build to prepare for incremental build.
 320
 321     This function runs though the InitSDKStage, SetupBoardStage, and
 322     BuildPackagesStage. It is intended to be called before applying
 323     any patches under test, to prepare the chroot and sysroot in a state
 324     corresponding to ToT prior to an incremental build.
 325
 326     Returns:
 327       True if all stages were successful, False if any of them failed.
 328     """
 329     suffix = ' (pre-Patch)'
 330     try:
 331       build_stages.InitSDKStage(
 332           self._run, chroot_replace=True, suffix=suffix).Run()
 333       for builder_run in self._run.GetUngroupedBuilderRuns():
 334         for board in builder_run.config.boards:
 335           build_stages.SetupBoardStage(
 336               builder_run, board=board, suffix=suffix).Run()
 337           build_stages.BuildPackagesStage(
 338               builder_run, board=board, suffix=suffix).Run()
 339     except failures_lib.StepFailure:
 340       return False
 341
 342     return True
 343
 344   @failures_lib.SetFailureType(failures_lib.InfrastructureFailure)
 345   def PerformStage(self):
 346     self.Initialize()
 347     with osutils.TempDir() as tempdir:
 348       # Save off the last manifest.
 349       fresh_sync = True
 350       if os.path.exists(self.repo.directory) and not self._run.options.clobber:
 351         old_filename = os.path.join(tempdir, 'old.xml')
 352         try:
 353           old_contents = self.repo.ExportManifest()
 354         except cros_build_lib.RunCommandError as e:
 355           cros_build_lib.Warning(str(e))
 356         else:
 357           osutils.WriteFile(old_filename, old_contents)
 358           fresh_sync = False
 359
 360       # Sync.
 361       self.ManifestCheckout(self.GetNextManifest())
 362
 363       # Print the blamelist.
 364       if fresh_sync:
 365         cros_build_lib.PrintBuildbotStepText('(From scratch)')
 366       elif self._run.options.buildbot:
 367         lkgm_manager.GenerateBlameList(self.repo, old_filename)
 368
 369       # Incremental builds request an additional build before patching changes.
 370       if self._run.config.build_before_patching:
 371         pre_build_passed = self.RunPrePatchBuild()
 372         if not pre_build_passed:
 373           cros_build_lib.PrintBuildbotStepText('Pre-patch build failed.')
 374
 375
 376 class LKGMSyncStage(SyncStage):
 377   """Stage that syncs to the last known good manifest blessed by builders."""
 378
 379   output_manifest_sha1 = False
 380
 381   def GetNextManifest(self):
 382     """Override: Gets the LKGM."""
 383     # TODO(sosa):  Should really use an initialized manager here.
 384     if self.internal:
 385       mv_dir = 'manifest-versions-internal'
 386     else:
 387       mv_dir = 'manifest-versions'
 388
 389     manifest_path = os.path.join(self._build_root, mv_dir)
 390     manifest_repo = self._GetManifestVersionsRepoUrl(read_only=True)
 391     manifest_version.RefreshManifestCheckout(manifest_path, manifest_repo)
 392     return os.path.join(manifest_path, lkgm_manager.LKGMManager.LKGM_PATH)
 393
 394
 395 class ManifestVersionedSyncStage(SyncStage):
 396   """Stage that generates a unique manifest file, and sync's to it."""
 397
 398   # TODO(mtennant): Make this into a builder run value.
 399   output_manifest_sha1 = False
 400
 401   def __init__(self, builder_run, **kwargs):
 402     # Perform the sync at the end of the stage to the given manifest.
 403     super(ManifestVersionedSyncStage, self).__init__(builder_run, **kwargs)
 404     self.repo = None
 405     self.manifest_manager = None
 406
 407     # If a builder pushes changes (even with dryrun mode), we need a writable
 408     # repository. Otherwise, the push will be rejected by the server.
 409     self.manifest_repo = self._GetManifestVersionsRepoUrl(read_only=False)
 410
 411     # 1. If we're uprevving Chrome, Chrome might have changed even if the
 412     #    manifest has not, so we should force a build to double check. This
 413     #    means that we'll create a new manifest, even if there are no changes.
 414     # 2. If we're running with --debug, we should always run through to
 415     #    completion, so as to ensure a complete test.
 416     self._force = self._chrome_rev or self._run.options.debug
 417
 418   def HandleSkip(self):
 419     """Initializes a manifest manager to the specified version if skipped."""
 420     super(ManifestVersionedSyncStage, self).HandleSkip()
 421     if self._run.options.force_version:
 422       self.Initialize()
 423       self.ForceVersion(self._run.options.force_version)
 424
 425   def ForceVersion(self, version):
 426     """Creates a manifest manager from given version and returns manifest."""
 427     cros_build_lib.PrintBuildbotStepText(version)
 428     return self.manifest_manager.BootstrapFromVersion(version)
 429
 430   def VersionIncrementType(self):
 431     """Return which part of the version number should be incremented."""
 432     if self._run.manifest_branch == 'master':
 433       return 'build'
 434
 435     return 'branch'
 436
 437   def RegisterManifestManager(self, manifest_manager):
 438     """Save the given manifest manager for later use in this run.
 439
 440     Args:
 441       manifest_manager: Expected to be a BuildSpecsManager.
 442     """
 443     self._run.attrs.manifest_manager = self.manifest_manager = manifest_manager
 444
 445   def Initialize(self):
 446     """Initializes a manager that manages manifests for associated stages."""
 447
 448     dry_run = self._run.options.debug
 449
 450     self._InitializeRepo()
 451
 452     # If chrome_rev is somehow set, fail.
 453     assert not self._chrome_rev, \
 454         'chrome_rev is unsupported on release builders.'
 455
 456     self.RegisterManifestManager(manifest_version.BuildSpecsManager(
 457         source_repo=self.repo,
 458         manifest_repo=self.manifest_repo,
 459         manifest=self._run.config.manifest,
 460         build_names=self._run.GetBuilderIds(),
 461         incr_type=self.VersionIncrementType(),
 462         force=self._force,
 463         branch=self._run.manifest_branch,
 464         dry_run=dry_run,
 465         master=self._run.config.master))
 466
 467   def _SetChromeVersionIfApplicable(self, manifest):
 468     """If 'chrome' is in |manifest|, write the version to the BuilderRun object.
 469
 470     Args:
 471       manifest: Path to the manifest.
 472     """
 473     manifest_dom = minidom.parse(manifest)
 474     elements = manifest_dom.getElementsByTagName(lkgm_manager.CHROME_ELEMENT)
 475
 476     if elements:
 477       chrome_version = elements[0].getAttribute(
 478           lkgm_manager.CHROME_VERSION_ATTR)
 479       logging.info(
 480           'Chrome version was found in the manifest: %s', chrome_version)
 481       # Update the metadata dictionary. This is necessary because the
 482       # metadata dictionary is preserved through re-executions, so
 483       # SyncChromeStage can read the version from the dictionary
 484       # later. This is easier than parsing the manifest again after
 485       # the re-execution.
 486       self._run.attrs.metadata.UpdateKeyDictWithDict(
 487           'version', {'chrome': chrome_version})
 488
 489   def GetNextManifest(self):
 490     """Uses the initialized manifest manager to get the next manifest."""
 491     assert self.manifest_manager, \
 492         'Must run GetStageManager before checkout out build.'
 493
 494     build_id = self._run.attrs.metadata.GetDict().get('build_id')
 495
 496     to_return = self.manifest_manager.GetNextBuildSpec(
 497         dashboard_url=self.ConstructDashboardURL(),
 498         build_id=build_id)
 499     previous_version = self.manifest_manager.GetLatestPassingSpec()
 500     target_version = self.manifest_manager.current_version
 501
 502     # Print the Blamelist here.
 503     url_prefix = 'http://chromeos-images.corp.google.com/diff/report?'
 504     url = url_prefix + 'from=%s&to=%s' % (previous_version, target_version)
 505     cros_build_lib.PrintBuildbotLink('Blamelist', url)
 506     # The testManifestVersionedSyncOnePartBranch interacts badly with this
 507     # function.  It doesn't fully initialize self.manifest_manager which
 508     # causes target_version to be None.  Since there isn't a clean fix in
 509     # either direction, just throw this through str().  In the normal case,
 510     # it's already a string anyways.
 511     cros_build_lib.PrintBuildbotStepText(str(target_version))
 512
 513     return to_return
 514
 515   @contextlib.contextmanager
 516   def LocalizeManifest(self, manifest, filter_cros=False):
 517     """Remove restricted checkouts from the manifest if needed.
 518
 519     Args:
 520       manifest: The manifest to localize.
 521       filter_cros: If set, then only checkouts with a remote of 'cros' or
 522         'cros-internal' are kept, and the rest are filtered out.
 523     """
 524     if filter_cros:
 525       with osutils.TempDir() as tempdir:
 526         filtered_manifest = os.path.join(tempdir, 'filtered.xml')
 527         doc = ElementTree.parse(manifest)
 528         root = doc.getroot()
 529         for node in root.findall('project'):
 530           remote = node.attrib.get('remote')
 531           if remote and remote not in constants.GIT_REMOTES:
 532             root.remove(node)
 533         doc.write(filtered_manifest)
 534         yield filtered_manifest
 535     else:
 536       yield manifest
 537
 538   @failures_lib.SetFailureType(failures_lib.InfrastructureFailure)
 539   def PerformStage(self):
 540     self.Initialize()
 541     if self._run.options.force_version:
 542       next_manifest = self.ForceVersion(self._run.options.force_version)
 543     else:
 544       next_manifest = self.GetNextManifest()
 545
 546     if not next_manifest:
 547       cros_build_lib.Info('Found no work to do.')
 548       if self._run.attrs.manifest_manager.DidLastBuildFail():
 549         raise failures_lib.StepFailure('The previous build failed.')
 550       else:
 551         sys.exit(0)
 552
 553     # Log this early on for the release team to grep out before we finish.
 554     if self.manifest_manager:
 555       self._Print('\nRELEASETAG: %s\n' % (
 556           self.manifest_manager.current_version))
 557
 558     self._SetChromeVersionIfApplicable(next_manifest)
 559     # To keep local trybots working, remove restricted checkouts from the
 560     # official manifest we get from manifest-versions.
 561     with self.LocalizeManifest(
 562         next_manifest, filter_cros=self._run.options.local) as new_manifest:
 563       self.ManifestCheckout(new_manifest)
 564
 565
 566 class MasterSlaveLKGMSyncStage(ManifestVersionedSyncStage):
 567   """Stage that generates a unique manifest file candidate, and sync's to it.
 568
 569   This stage uses an LKGM manifest manager that handles LKGM
 570   candidates and their states.
 571   """
 572
 573   # Timeout for waiting on the latest candidate manifest.
 574   LATEST_CANDIDATE_TIMEOUT_SECONDS = 20 * 60
 575
 576   # TODO(mtennant): Turn this into self._run.attrs.sub_manager or similar.
 577   # An instance of lkgm_manager.LKGMManager for slave builds.
 578   sub_manager = None
 579
 580   def __init__(self, builder_run, **kwargs):
 581     super(MasterSlaveLKGMSyncStage, self).__init__(builder_run, **kwargs)
 582     # lkgm_manager deals with making sure we're synced to whatever manifest
 583     # we get back in GetNextManifest so syncing again is redundant.
 584     self.skip_sync = True
 585     self._chrome_version = None
 586
 587   def _GetInitializedManager(self, internal):
 588     """Returns an initialized lkgm manager.
 589
 590     Args:
 591       internal: Boolean.  True if this is using an internal manifest.
 592
 593     Returns:
 594       lkgm_manager.LKGMManager.
 595     """
 596     increment = self.VersionIncrementType()
 597     return lkgm_manager.LKGMManager(
 598         source_repo=self.repo,
 599         manifest_repo=cbuildbot_config.GetManifestVersionsRepoUrl(
 600             internal, read_only=False),
 601         manifest=self._run.config.manifest,
 602         build_names=self._run.GetBuilderIds(),
 603         build_type=self._run.config.build_type,
 604         incr_type=increment,
 605         force=self._force,
 606         branch=self._run.manifest_branch,
 607         dry_run=self._run.options.debug,
 608         master=self._run.config.master)
 609
 610   def Initialize(self):
 611     """Override: Creates an LKGMManager rather than a ManifestManager."""
 612     self._InitializeRepo()
 613     self.RegisterManifestManager(self._GetInitializedManager(self.internal))
 614     if (self._run.config.master and self._GetSlaveConfigs()):
 615       assert self.internal, 'Unified masters must use an internal checkout.'
 616       MasterSlaveLKGMSyncStage.sub_manager = self._GetInitializedManager(False)
 617
 618   def ForceVersion(self, version):
 619     manifest = super(MasterSlaveLKGMSyncStage, self).ForceVersion(version)
 620     if MasterSlaveLKGMSyncStage.sub_manager:
 621       MasterSlaveLKGMSyncStage.sub_manager.BootstrapFromVersion(version)
 622
 623     return manifest
 624
 625   def GetNextManifest(self):
 626     """Gets the next manifest using LKGM logic."""
 627     assert self.manifest_manager, \
 628         'Must run Initialize before we can get a manifest.'
 629     assert isinstance(self.manifest_manager, lkgm_manager.LKGMManager), \
 630         'Manifest manager instantiated with wrong class.'
 631
 632     if self._run.config.master:
 633       build_id = self._run.attrs.metadata.GetDict().get('build_id')
 634       manifest = self.manifest_manager.CreateNewCandidate(
 635           chrome_version=self._chrome_version,
 636           build_id=build_id)
 637       if MasterSlaveLKGMSyncStage.sub_manager:
 638         MasterSlaveLKGMSyncStage.sub_manager.CreateFromManifest(
 639             manifest, dashboard_url=self.ConstructDashboardURL())
 640       return manifest
 641     else:
 642       return self.manifest_manager.GetLatestCandidate(
 643           dashboard_url=self.ConstructDashboardURL(),
 644           timeout=self.LATEST_CANDIDATE_TIMEOUT_SECONDS)
 645
 646   def GetLatestChromeVersion(self):
 647     """Returns the version of Chrome to uprev."""
 648     return cros_mark_chrome_as_stable.GetLatestRelease(
 649         constants.CHROMIUM_GOB_URL)
 650
 651   @failures_lib.SetFailureType(failures_lib.InfrastructureFailure)
 652   def PerformStage(self):
 653     """Performs the stage."""
 654     if (self._chrome_rev == constants.CHROME_REV_LATEST and
 655         self._run.config.master):
 656       # PFQ master needs to determine what version of Chrome to build
 657       # for all slaves.
 658       self._chrome_version = self.GetLatestChromeVersion()
 659
 660     ManifestVersionedSyncStage.PerformStage(self)
 661
 662
 663 class CommitQueueSyncStage(MasterSlaveLKGMSyncStage):
 664   """Commit Queue Sync stage that handles syncing and applying patches.
 665
 666   Similar to the MasterSlaveLKGMsync Stage, this stage handles syncing
 667   to a manifest, passing around that manifest to other builders.
 668
 669   What makes this stage different is that the CQ master finds the
 670   patches on Gerrit which are ready to be committed, apply them, and
 671   includes the pathces in the new manifest. The slaves sync to the
 672   manifest, and apply the paches written in the manifest.
 673   """
 674
 675   def __init__(self, builder_run, **kwargs):
 676     super(CommitQueueSyncStage, self).__init__(builder_run, **kwargs)
 677     # Figure out the builder's name from the buildbot waterfall.
 678     builder_name = self._run.config.paladin_builder_name
 679     self.builder_name = builder_name if builder_name else self._run.config.name
 680
 681     # The pool of patches to be picked up by the commit queue.
 682     # - For the master commit queue, it's initialized in GetNextManifest.
 683     # - For slave commit queues, it's initialized in _SetPoolFromManifest.
 684     #
 685     # In all cases, the pool is saved to disk.
 686     self.pool = None
 687
 688   def HandleSkip(self):
 689     """Handles skip and initializes validation pool from manifest."""
 690     super(CommitQueueSyncStage, self).HandleSkip()
 691     filename = self._run.options.validation_pool
 692     if filename:
 693       self.pool = validation_pool.ValidationPool.Load(filename,
 694           metadata=self._run.attrs.metadata)
 695     else:
 696       self._SetPoolFromManifest(self.manifest_manager.GetLocalManifest())
 697
 698   def _ChangeFilter(self, pool, changes, non_manifest_changes):
 699     # First, look for changes that were tested by the Pre-CQ.
 700     changes_to_test = []
 701     for change in changes:
 702       status = pool.GetCLStatus(PRE_CQ, change)
 703       if status == validation_pool.ValidationPool.STATUS_PASSED:
 704         changes_to_test.append(change)
 705
 706     # If we only see changes that weren't verified by Pre-CQ, try all of the
 707     # changes. This ensures that the CQ continues to work even if the Pre-CQ is
 708     # down.
 709     if not changes_to_test:
 710       changes_to_test = changes
 711
 712     return changes_to_test, non_manifest_changes
 713
 714   def _SetPoolFromManifest(self, manifest):
 715     """Sets validation pool based on manifest path passed in."""
 716     # Note that GetNextManifest() calls GetLatestCandidate() in this case,
 717     # so the repo will already be sync'd appropriately. This means that
 718     # AcquirePoolFromManifest does not need to sync.
 719     self.pool = validation_pool.ValidationPool.AcquirePoolFromManifest(
 720         manifest, self._run.config.overlays, self.repo,
 721         self._run.buildnumber, self.builder_name,
 722         self._run.config.master, self._run.options.debug,
 723         metadata=self._run.attrs.metadata)
 724
 725   def GetNextManifest(self):
 726     """Gets the next manifest using LKGM logic."""
 727     assert self.manifest_manager, \
 728         'Must run Initialize before we can get a manifest.'
 729     assert isinstance(self.manifest_manager, lkgm_manager.LKGMManager), \
 730         'Manifest manager instantiated with wrong class.'
 731
 732     build_id = self._run.attrs.metadata.GetDict().get('build_id')
 733
 734     if self._run.config.master:
 735       try:
 736         # In order to acquire a pool, we need an initialized buildroot.
 737         if not git.FindRepoDir(self.repo.directory):
 738           self.repo.Initialize()
 739         self.pool = pool = validation_pool.ValidationPool.AcquirePool(
 740             self._run.config.overlays, self.repo,
 741             self._run.buildnumber, self.builder_name,
 742             self._run.options.debug,
 743             check_tree_open=not self._run.options.debug or
 744                             self._run.options.mock_tree_status,
 745             changes_query=self._run.options.cq_gerrit_override,
 746             change_filter=self._ChangeFilter, throttled_ok=True,
 747             metadata=self._run.attrs.metadata)
 748
 749       except validation_pool.TreeIsClosedException as e:
 750         cros_build_lib.Warning(str(e))
 751         return None
 752
 753       manifest = self.manifest_manager.CreateNewCandidate(validation_pool=pool,
 754                                                           build_id=build_id)
 755       if MasterSlaveLKGMSyncStage.sub_manager:
 756         MasterSlaveLKGMSyncStage.sub_manager.CreateFromManifest(
 757             manifest, dashboard_url=self.ConstructDashboardURL(),
 758             build_id=build_id)
 759
 760       return manifest
 761     else:
 762       manifest = self.manifest_manager.GetLatestCandidate(
 763           dashboard_url=self.ConstructDashboardURL())
 764       if manifest:
 765         if self._run.config.build_before_patching:
 766           pre_build_passed = self.RunPrePatchBuild()
 767           cros_build_lib.PrintBuildbotStepName(
 768               'CommitQueueSync : Apply Patches')
 769           if not pre_build_passed:
 770             cros_build_lib.PrintBuildbotStepText('Pre-patch build failed.')
 771
 772         self._SetPoolFromManifest(manifest)
 773         self.pool.ApplyPoolIntoRepo()
 774
 775       return manifest
 776
 777   @failures_lib.SetFailureType(failures_lib.InfrastructureFailure)
 778   def PerformStage(self):
 779     """Performs normal stage and prints blamelist at end."""
 780     if self._run.options.force_version:
 781       self.HandleSkip()
 782     else:
 783       ManifestVersionedSyncStage.PerformStage(self)
 784
 785
 786 class PreCQSyncStage(SyncStage):
 787   """Sync and apply patches to test if they compile."""
 788
 789   def __init__(self, builder_run, patches, **kwargs):
 790     super(PreCQSyncStage, self).__init__(builder_run, **kwargs)
 791
 792     # The list of patches to test.
 793     self.patches = patches
 794
 795     # The ValidationPool of patches to test. Initialized in PerformStage, and
 796     # refreshed after bootstrapping by HandleSkip.
 797     self.pool = None
 798
 799   def HandleSkip(self):
 800     """Handles skip and loads validation pool from disk."""
 801     super(PreCQSyncStage, self).HandleSkip()
 802     filename = self._run.options.validation_pool
 803     if filename:
 804       self.pool = validation_pool.ValidationPool.Load(filename,
 805           metadata=self._run.attrs.metadata)
 806
 807   def PerformStage(self):
 808     super(PreCQSyncStage, self).PerformStage()
 809     self.pool = validation_pool.ValidationPool.AcquirePreCQPool(
 810         self._run.config.overlays, self._build_root,
 811         self._run.buildnumber, self._run.config.name,
 812         dryrun=self._run.options.debug_forced, changes=self.patches,
 813         metadata=self._run.attrs.metadata)
 814     self.pool.ApplyPoolIntoRepo()
 815
 816     if len(self.pool.changes) == 0:
 817       cros_build_lib.Die('No changes have been applied.')
 818
 819
 820 class PreCQLauncherStage(SyncStage):
 821   """Scans for CLs and automatically launches Pre-CQ jobs to test them."""
 822
 823   # The CL is currently being tested by a Pre-CQ builder.
 824   STATUS_INFLIGHT = validation_pool.ValidationPool.STATUS_INFLIGHT
 825
 826   # The CL has passed the Pre-CQ.
 827   STATUS_PASSED = validation_pool.ValidationPool.STATUS_PASSED
 828
 829   # The CL has failed the Pre-CQ.
 830   STATUS_FAILED = validation_pool.ValidationPool.STATUS_FAILED
 831
 832   # We have requested a Pre-CQ trybot but it has not started yet.
 833   STATUS_LAUNCHING = validation_pool.ValidationPool.STATUS_LAUNCHING
 834
 835   # The CL is ready to be retried.
 836   STATUS_WAITING = validation_pool.ValidationPool.STATUS_WAITING
 837
 838   # The CL has passed the Pre-CQ and is ready to be submitted.
 839   STATUS_READY_TO_SUBMIT = validation_pool.ValidationPool.STATUS_READY_TO_SUBMIT
 840
 841   # The number of minutes we allow before considering a launch attempt failed.
 842   # If this window isn't hit in a given launcher run, the window will start
 843   # again from scratch in the next run.
 844   LAUNCH_DELAY = 90
 845
 846   # The number of minutes we allow before considering an in-flight
 847   # job failed. If this window isn't hit in a given launcher run, the window
 848   # will start again from scratch in the next run.
 849   INFLIGHT_DELAY = 180
 850
 851   # The maximum number of patches we will allow in a given trybot run. This is
 852   # needed because our trybot infrastructure can only handle so many patches at
 853   # once.
 854   MAX_PATCHES_PER_TRYBOT_RUN = 50
 855
 856   def __init__(self, builder_run, **kwargs):
 857     super(PreCQLauncherStage, self).__init__(builder_run, **kwargs)
 858     self.skip_sync = True
 859     # Mapping from launching changes to the first known time when they
 860     # were launching.
 861     self.launching = {}
 862     # Mapping from inflight changes to the first known time when they
 863     # were inflight.
 864     self.inflight = {}
 865     self.retried = set()
 866
 867     self._build_id = self._run.attrs.metadata.GetValue('build_id')
 868
 869   def _HasLaunchTimedOut(self, change):
 870     """Check whether a given |change| has timed out on its trybot launch.
 871
 872     Assumes that the change is in the middle of being launched.
 873
 874     Returns:
 875       True if the change has timed out. False otherwise.
 876     """
 877     diff = datetime.timedelta(minutes=self.LAUNCH_DELAY)
 878     return datetime.datetime.now() - self.launching[change] > diff
 879
 880   def _HasInflightTimedOut(self, change):
 881     """Check whether a given |change| has timed out while trybot inflight.
 882
 883     Assumes that the change's trybot is inflight.
 884
 885     Returns:
 886       True if the change has timed out. False otherwise.
 887     """
 888     diff = datetime.timedelta(minutes=self.INFLIGHT_DELAY)
 889     return datetime.datetime.now() - self.inflight[change] > diff
 890
 891   @staticmethod
 892   def _PrintPatchStatus(patch, status):
 893     """Print a link to |patch| with |status| info."""
 894     items = (
 895         status,
 896         os.path.basename(patch.project),
 897         str(patch),
 898     )
 899     cros_build_lib.PrintBuildbotLink(' | '.join(items), patch.url)
 900
 901   def GetPreCQStatus(self, pool, changes, status_map):
 902     """Get the Pre-CQ status of a list of changes.
 903
 904     Side effect: reject or retry changes that have timed out.
 905
 906     Args:
 907       pool: The validation pool.
 908       changes: Changes to examine.
 909       status_map: Dict mapping changes to their CL status.
 910
 911     Returns:
 912       busy: The set of CLs that are currently being tested.
 913       passed: The set of CLs that have been verified.
 914     """
 915     busy, passed = set(), set()
 916
 917     for change in changes:
 918       status = status_map[change]
 919
 920       if status != self.STATUS_LAUNCHING:
 921         # The trybot is not launching, so we should remove it from our
 922         # launching timeout map.
 923         self.launching.pop(change, None)
 924
 925       if status != self.STATUS_INFLIGHT:
 926         # The trybot is not inflight, so we should remove it from our
 927         # inflight timeout map.
 928         self.inflight.pop(change, None)
 929
 930       if status == self.STATUS_LAUNCHING:
 931         # The trybot is in the process of launching.
 932         busy.add(change)
 933         if change not in self.launching:
 934           # Record the launch time of changes.
 935           self.launching[change] = datetime.datetime.now()
 936         elif self._HasLaunchTimedOut(change):
 937           if change in self.retried:
 938             msg = ('We were not able to launch a pre-cq trybot for your change.'
 939                    '\n\n'
 940                    'This problem can happen if the trybot waterfall is very '
 941                    'busy, or if there is an infrastructure issue. Please '
 942                    'notify the sheriff and mark your change as ready again. If '
 943                    'this problem occurs multiple times in a row, please file a '
 944                    'bug.')
 945
 946             pool.SendNotification(change, '%(details)s', details=msg)
 947             pool.RemoveCommitReady(change)
 948             pool.UpdateCLStatus(PRE_CQ, change, self.STATUS_FAILED,
 949                                 self._run.options.debug,
 950                                 build_id=self._build_id)
 951             self.retried.discard(change)
 952           else:
 953             # Try the change again.
 954             self.retried.add(change)
 955             pool.UpdateCLStatus(PRE_CQ, change, self.STATUS_WAITING,
 956                                 self._run.options.debug,
 957                                 build_id=self._build_id)
 958       elif status == self.STATUS_INFLIGHT:
 959         # Once a Pre-CQ run actually starts, it'll set the status to
 960         # STATUS_INFLIGHT.
 961         busy.add(change)
 962         if change not in self.inflight:
 963           # Record the inflight start time.
 964           self.inflight[change] = datetime.datetime.now()
 965         elif self._HasInflightTimedOut(change):
 966           msg = ('The pre-cq trybot for your change timed out after %s minutes.'
 967                  '\n\n'
 968                  'This problem can happen if your change causes the builder '
 969                  'to hang, or if there is some infrastructure issue. If your '
 970                  'change is not at fault you may mark your change as ready '
 971                  'again. If this problem occurs multiple times please notify '
 972                  'the sheriff and file a bug.' % self.INFLIGHT_DELAY)
 973
 974           pool.SendNotification(change, '%(details)s', details=msg)
 975           pool.RemoveCommitReady(change)
 976           pool.UpdateCLStatus(PRE_CQ, change, self.STATUS_FAILED,
 977                               self._run.options.debug,
 978                               build_id=self._build_id)
 979       elif status == self.STATUS_FAILED:
 980         # The Pre-CQ run failed for this change. It's possible that we got
 981         # unlucky and this change was just marked as 'Not Ready' by a bot. To
 982         # test this, mark the CL as 'waiting' for now. If the CL is still marked
 983         # as 'Ready' next time we check, we'll know the CL is truly still ready.
 984         busy.add(change)
 985         pool.UpdateCLStatus(PRE_CQ, change, self.STATUS_WAITING,
 986                             self._run.options.debug,
 987                             build_id=self._build_id)
 988         self._PrintPatchStatus(change, status)
 989       elif status == self.STATUS_PASSED:
 990         passed.add(change)
 991         self._PrintPatchStatus(change, status)
 992       elif status == self.STATUS_READY_TO_SUBMIT:
 993         passed.add(change)
 994         self._PrintPatchStatus(change, 'submitting')
 995
 996     return busy, passed
 997
 998   def LaunchTrybot(self, pool, plan):
 999     """Launch a Pre-CQ run with the provided list of CLs.
1000
1001     Args:
1002       pool: ValidationPool corresponding to |plan|.
1003       plan: The list of patches to test in the Pre-CQ run.
1004     """
1005     cmd = ['cbuildbot', '--remote', constants.PRE_CQ_BUILDER_NAME,
1006            '--timeout', str(self.INFLIGHT_DELAY * 60)]
1007     if self._run.options.debug:
1008       cmd.append('--debug')
1009     for patch in plan:
1010       cmd += ['-g', cros_patch.AddPrefix(patch, patch.gerrit_number)]
1011       self._PrintPatchStatus(patch, 'testing')
1012     cros_build_lib.RunCommand(cmd, cwd=self._build_root)
1013     for patch in plan:
1014       if pool.GetCLStatus(PRE_CQ, patch) != self.STATUS_PASSED:
1015         pool.UpdateCLStatus(PRE_CQ, patch, self.STATUS_LAUNCHING,
1016                             self._run.options.debug,
1017                             build_id=self._build_id)
1018
1019   def GetDisjointTransactionsToTest(self, pool, changes, status_map):
1020     """Get the list of disjoint transactions to test.
1021
1022     Side effect: reject or retry changes that have timed out.
1023
1024     Args:
1025       pool: The validation pool.
1026       changes: Changes to examine.
1027       status_map: Dict mapping changes to their CL status.
1028
1029     Returns:
1030       A list of disjoint transactions to test. Each transaction should be sent
1031       to a different Pre-CQ trybot.
1032     """
1033     busy, passed = self.GetPreCQStatus(pool, changes, status_map)
1034
1035     # Create a list of disjoint transactions to test.
1036     manifest = git.ManifestCheckout.Cached(self._build_root)
1037     plans = pool.CreateDisjointTransactions(
1038         manifest, max_txn_length=self.MAX_PATCHES_PER_TRYBOT_RUN)
1039     for plan in plans:
1040       # If any of the CLs in the plan are currently "busy" being tested,
1041       # wait until they're done before launching our trybot run. This helps
1042       # avoid race conditions.
1043       #
1044       # Similarly, if all of the CLs in the plan have already been validated,
1045       # there's no need to launch a trybot run.
1046       plan = set(plan)
1047       if plan.issubset(passed):
1048         logging.info('CLs already verified: %r', ' '.join(map(str, plan)))
1049       elif plan.intersection(busy):
1050         logging.info('CLs currently being verified: %r',
1051                      ' '.join(map(str, plan.intersection(busy))))
1052         if plan.difference(busy):
1053           logging.info('CLs waiting on verification of dependencies: %r',
1054               ' '.join(map(str, plan.difference(busy))))
1055       else:
1056         yield plan
1057
1058   def ProcessChanges(self, pool, changes, _non_manifest_changes):
1059     """Process a list of changes that were marked as Ready.
1060
1061     From our list of changes that were marked as Ready, we create a
1062     list of disjoint transactions and send each one to a separate Pre-CQ
1063     trybot.
1064
1065     Non-manifest changes are just submitted here because they don't need to be
1066     verified by either the Pre-CQ or CQ.
1067     """
1068     # Get change status.
1069     status_map = {}
1070     for change in changes:
1071       status = pool.GetCLStatus(PRE_CQ, change)
1072       status_map[change] = status
1073
1074     # Launch trybots for manifest changes.
1075     for plan in self.GetDisjointTransactionsToTest(pool, changes, status_map):
1076       self.LaunchTrybot(pool, plan)
1077
1078     # Submit changes that don't need a CQ run if we can.
1079     if tree_status.IsTreeOpen():
1080       pool.SubmitNonManifestChanges(check_tree_open=False)
1081       submitting = [change for (change, status) in status_map.items()
1082                     if status == self.STATUS_READY_TO_SUBMIT]
1083       pool.SubmitChanges(submitting, check_tree_open=False)
1084
1085     # Tell ValidationPool to keep waiting for more changes until we hit
1086     # its internal timeout.
1087     return [], []
1088
1089   @failures_lib.SetFailureType(failures_lib.InfrastructureFailure)
1090   def PerformStage(self):
1091     # Setup and initialize the repo.
1092     super(PreCQLauncherStage, self).PerformStage()
1093
1094     # Loop through all of the changes until we hit a timeout.
1095     validation_pool.ValidationPool.AcquirePool(
1096         self._run.config.overlays, self.repo,
1097         self._run.buildnumber,
1098         constants.PRE_CQ_LAUNCHER_NAME,
1099         dryrun=self._run.options.debug,
1100         changes_query=self._run.options.cq_gerrit_override,
1101         check_tree_open=False, change_filter=self.ProcessChanges,
1102         metadata=self._run.attrs.metadata)