2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """A git command for managing a local cache of git repositories."""
8 from __future__ import print_function
22 from download_from_google_storage import Gsutil
26 # Analogous to gc.autopacklimit git config.
29 GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
32 # pylint: disable=E0602
35 class WinErr(Exception):
38 class LockError(Exception):
41 class RefsHeadsFailedToFetch(Exception):
44 class Lockfile(object):
45 """Class to represent a cross-platform process-specific lockfile."""
47 def __init__(self, path):
48 self.path = os.path.abspath(path)
49 self.lockfile = self.path + ".lock"
50 self.pid = os.getpid()
53 """Read the pid stored in the lockfile.
55 Note: This method is potentially racy. By the time it returns the lockfile
56 may have been unlocked, removed, or stolen by some other process.
59 with open(self.lockfile, 'r') as f:
60 pid = int(f.readline().strip())
61 except (IOError, ValueError):
65 def _make_lockfile(self):
66 """Safely creates a lockfile containing the current pid."""
67 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
68 fd = os.open(self.lockfile, open_flags, 0o644)
69 f = os.fdopen(fd, 'w')
70 print(self.pid, file=f)
73 def _remove_lockfile(self):
74 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
76 See gclient_utils.py:rmtree docstring for more explanation on the
79 if sys.platform == 'win32':
80 lockfile = os.path.normcase(self.lockfile)
82 exitcode = subprocess.call(['cmd.exe', '/c',
83 'del', '/f', '/q', lockfile])
87 raise LockError('Failed to remove lock: %s' % lockfile)
89 os.remove(self.lockfile)
94 Note: This is a NON-BLOCKING FAIL-FAST operation.
95 Do. Or do not. There is no try.
100 if e.errno == errno.EEXIST:
101 raise LockError("%s is already locked" % self.path)
103 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
106 """Release the lock."""
108 if not self.is_locked():
109 raise LockError("%s is not locked" % self.path)
110 if not self.i_am_locking():
111 raise LockError("%s is locked, but not by me" % self.path)
112 self._remove_lockfile()
114 # Windows is unreliable when it comes to file locking. YMMV.
117 def break_lock(self):
118 """Remove the lock, even if it was created by someone else."""
120 self._remove_lockfile()
122 except OSError as exc:
123 if exc.errno == errno.ENOENT:
129 """Test if the file is locked by anyone.
131 Note: This method is potentially racy. By the time it returns the lockfile
132 may have been unlocked, removed, or stolen by some other process.
134 return os.path.exists(self.lockfile)
136 def i_am_locking(self):
137 """Test if the file is locked by this process."""
138 return self.is_locked() and self.pid == self._read_pid()
141 class Mirror(object):
143 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
144 gsutil_exe = os.path.join(
145 os.path.dirname(os.path.abspath(__file__)),
146 'third_party', 'gsutil', 'gsutil')
147 cachepath_lock = threading.Lock()
149 def __init__(self, url, refs=None, print_func=None):
151 self.refs = refs or []
152 self.basedir = self.UrlToCacheDir(url)
153 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
154 self.print = print_func or print
157 def bootstrap_bucket(self):
158 if 'chrome-internal' in self.url:
159 return 'chrome-git-cache'
161 return 'chromium-git-cache'
164 def FromPath(cls, path):
165 return cls(cls.CacheDirToUrl(path))
168 def UrlToCacheDir(url):
169 """Convert a git url to a normalized form for the cache dir path."""
170 parsed = urlparse.urlparse(url)
171 norm_url = parsed.netloc + parsed.path
172 if norm_url.endswith('.git'):
173 norm_url = norm_url[:-len('.git')]
174 return norm_url.replace('-', '--').replace('/', '-').lower()
177 def CacheDirToUrl(path):
178 """Convert a cache dir path to its corresponding url."""
179 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
180 return 'https://%s' % netpath
183 def FindExecutable(executable):
184 """This mimics the "which" utility."""
185 path_folders = os.environ.get('PATH').split(os.pathsep)
187 for path_folder in path_folders:
188 target = os.path.join(path_folder, executable)
189 # Just incase we have some ~/blah paths.
190 target = os.path.abspath(os.path.expanduser(target))
191 if os.path.isfile(target) and os.access(target, os.X_OK):
193 if sys.platform.startswith('win'):
194 for suffix in ('.bat', '.cmd', '.exe'):
195 alt_target = target + suffix
196 if os.path.isfile(alt_target) and os.access(alt_target, os.X_OK):
201 def SetCachePath(cls, cachepath):
202 with cls.cachepath_lock:
203 setattr(cls, 'cachepath', cachepath)
206 def GetCachePath(cls):
207 with cls.cachepath_lock:
208 if not hasattr(cls, 'cachepath'):
210 cachepath = subprocess.check_output(
211 [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
212 except subprocess.CalledProcessError:
216 'No global cache.cachepath git configuration found.')
217 setattr(cls, 'cachepath', cachepath)
218 return getattr(cls, 'cachepath')
220 def RunGit(self, cmd, **kwargs):
221 """Run git in a subprocess."""
222 cwd = kwargs.setdefault('cwd', self.mirror_path)
223 kwargs.setdefault('print_stdout', False)
224 kwargs.setdefault('filter_fn', self.print)
225 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
226 env.setdefault('GIT_ASKPASS', 'true')
227 env.setdefault('SSH_ASKPASS', 'true')
228 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
229 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
231 def config(self, cwd=None):
233 cwd = self.mirror_path
235 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
236 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
238 # Don't combine pack files into one big pack file. It's really slow for
239 # repositories, and there's no way to track progress and make sure it's
241 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
243 # Allocate more RAM for cache-ing delta chains, for better performance
244 # of "Resolving deltas".
245 self.RunGit(['config', 'core.deltaBaseCacheLimit',
246 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
248 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
249 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
250 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
251 for ref in self.refs:
252 ref = ref.lstrip('+').rstrip('/')
253 if ref.startswith('refs/'):
254 refspec = '+%s:%s' % (ref, ref)
255 regex = r'\+%s:.*' % ref.replace('*', r'\*')
257 refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
258 regex = r'\+refs/heads/%s:.*' % ref.replace('*', r'\*')
260 ['config', '--replace-all', 'remote.origin.fetch', refspec, regex],
263 def bootstrap_repo(self, directory):
264 """Bootstrap the repo from Google Stroage if possible.
266 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
269 python_fallback = False
270 if sys.platform.startswith('win') and not self.FindExecutable('7z'):
271 python_fallback = True
272 elif sys.platform.startswith('darwin'):
273 # The OSX version of unzip doesn't support zip64.
274 python_fallback = True
275 elif not self.FindExecutable('unzip'):
276 python_fallback = True
278 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
279 gsutil = Gsutil(self.gsutil_exe, boto_path=None, bypass_prodaccess=True)
280 # Get the most recent version of the zipfile.
281 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
282 ls_out_sorted = sorted(ls_out.splitlines())
283 if not ls_out_sorted:
284 # This repo is not on Google Storage.
286 latest_checkout = ls_out_sorted[-1]
288 # Download zip file to a temporary directory.
290 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
291 self.print('Downloading %s' % latest_checkout)
292 code = gsutil.call('cp', latest_checkout, tempdir)
295 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
297 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
298 if not python_fallback:
299 if sys.platform.startswith('win'):
300 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
302 cmd = ['unzip', filename, '-d', directory]
303 retcode = subprocess.call(cmd)
306 with zipfile.ZipFile(filename, 'r') as f:
308 f.extractall(directory)
309 except Exception as e:
310 self.print('Encountered error: %s' % str(e), file=sys.stderr)
315 # Clean up the downloaded zipfile.
316 gclient_utils.rmtree(tempdir)
320 'Extracting bootstrap zipfile %s failed.\n'
321 'Resuming normal operations.' % filename)
326 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
328 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
330 config_file = os.path.join(self.mirror_path, 'config')
331 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
334 if os.path.isdir(pack_dir):
335 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
337 should_bootstrap = (force or
338 not os.path.exists(config_file) or
339 len(pack_files) > GC_AUTOPACKLIMIT)
341 tempdir = tempfile.mkdtemp(
342 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
343 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
345 # Bootstrap succeeded; delete previous cache, if any.
346 gclient_utils.rmtree(self.mirror_path)
347 elif not os.path.exists(config_file):
348 # Bootstrap failed, no previous cache; start with a bare git dir.
349 self.RunGit(['init', '--bare'], cwd=tempdir)
351 # Bootstrap failed, previous cache exists; warn and continue.
353 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
354 'but failed. Continuing with non-optimized repository.'
356 gclient_utils.rmtree(tempdir)
359 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
361 'Shallow fetch requested, but repo cache already exists.')
364 def _fetch(self, rundir, verbose, depth):
369 v = ['-v', '--progress']
371 d = ['--depth', str(depth)]
372 fetch_cmd = ['fetch'] + v + d + ['origin']
373 fetch_specs = subprocess.check_output(
374 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
375 cwd=rundir).strip().splitlines()
376 for spec in fetch_specs:
378 self.print('Fetching %s' % spec)
379 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
380 except subprocess.CalledProcessError:
381 if spec == '+refs/heads/*:refs/heads/*':
382 raise RefsHeadsFailedToFetch
383 logging.warn('Fetch of %s failed' % spec)
385 def populate(self, depth=None, shallow=False, bootstrap=False,
386 verbose=False, ignore_lock=False):
387 assert self.GetCachePath()
388 if shallow and not depth:
390 gclient_utils.safe_makedirs(self.GetCachePath())
392 lockfile = Lockfile(self.mirror_path)
398 tempdir = self._ensure_bootstrapped(depth, bootstrap)
399 rundir = tempdir or self.mirror_path
400 self._fetch(rundir, verbose, depth)
401 except RefsHeadsFailedToFetch:
402 # This is a major failure, we need to clean and force a bootstrap.
403 gclient_utils.rmtree(rundir)
404 self.print(GIT_CACHE_CORRUPT_MESSAGE)
405 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
407 self._fetch(tempdir or self.mirror_path, verbose, depth)
411 if os.path.exists(self.mirror_path):
412 gclient_utils.rmtree(self.mirror_path)
413 os.rename(tempdir, self.mirror_path)
415 # This is somehow racy on Windows.
416 # Catching OSError because WindowsError isn't portable and
418 self.print('Error moving %s to %s: %s' % (tempdir, self.mirror_path,
423 def update_bootstrap(self, prune=False):
424 # The files are named <git number>.zip
425 gen_number = subprocess.check_output(
426 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
427 # Run Garbage Collect to compress packfile.
428 self.RunGit(['gc', '--prune=all'])
429 # Creating a temp file and then deleting it ensures we can use this name.
430 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
431 os.remove(tmp_zipfile)
432 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
433 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
434 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
435 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
436 gsutil.call('cp', tmp_zipfile, dest_name)
437 os.remove(tmp_zipfile)
439 # Remove all other files in the same directory.
441 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
442 for filename in ls_out.splitlines():
443 if filename == dest_name:
445 gsutil.call('rm', filename)
448 def DeleteTmpPackFiles(path):
449 pack_dir = os.path.join(path, 'objects', 'pack')
450 if not os.path.isdir(pack_dir):
452 pack_files = [f for f in os.listdir(pack_dir) if
453 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
455 f = os.path.join(pack_dir, f)
458 logging.warn('Deleted stale temporary pack file %s' % f)
460 logging.warn('Unable to delete temporary pack file %s' % f)
463 def BreakLocks(cls, path):
468 # Look for lock files that might have been left behind by an interrupted
470 lf = os.path.join(path, 'config.lock')
471 if os.path.exists(lf):
474 cls.DeleteTmpPackFiles(path)
478 return self.BreakLocks(self.mirror_path)
482 cachepath = cls.GetCachePath()
485 dirlist = os.listdir(cachepath)
486 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
487 if os.path.isdir(os.path.join(cachepath, path))])
488 for dirent in dirlist:
489 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
490 gclient_utils.rmtree(os.path.join(cachepath, dirent))
491 elif (dirent.endswith('.lock') and
492 os.path.isfile(os.path.join(cachepath, dirent))):
493 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
496 for repo_dir in repo_dirs:
497 if cls.BreakLocks(repo_dir):
498 unlocked_repos.append(repo_dir)
500 return unlocked_repos
502 @subcommand.usage('[url of repo to check for caching]')
503 def CMDexists(parser, args):
504 """Check to see if there already is a cache of the given repo."""
505 _, args = parser.parse_args(args)
506 if not len(args) == 1:
507 parser.error('git cache exists only takes exactly one repo url.')
511 print(mirror.mirror_path)
516 @subcommand.usage('[url of repo to create a bootstrap zip file]')
517 def CMDupdate_bootstrap(parser, args):
518 """Create and uploads a bootstrap tarball."""
519 # Lets just assert we can't do this on Windows.
520 if sys.platform.startswith('win'):
521 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
524 parser.add_option('--prune', action='store_true',
525 help='Prune all other cached zipballs of the same repo.')
527 # First, we need to ensure the cache is populated.
528 populate_args = args[:]
529 populate_args.append('--no-bootstrap')
530 CMDpopulate(parser, populate_args)
532 # Get the repo directory.
533 options, args = parser.parse_args(args)
536 mirror.update_bootstrap(options.prune)
540 @subcommand.usage('[url of repo to add to or update in cache]')
541 def CMDpopulate(parser, args):
542 """Ensure that the cache has all up-to-date objects for the given repo."""
543 parser.add_option('--depth', type='int',
544 help='Only cache DEPTH commits of history')
545 parser.add_option('--shallow', '-s', action='store_true',
546 help='Only cache 10000 commits of history')
547 parser.add_option('--ref', action='append',
548 help='Specify additional refs to be fetched')
549 parser.add_option('--no_bootstrap', '--no-bootstrap',
551 help='Don\'t bootstrap from Google Storage')
552 parser.add_option('--ignore_locks', '--ignore-locks',
554 help='Don\'t try to lock repository')
556 options, args = parser.parse_args(args)
557 if not len(args) == 1:
558 parser.error('git cache populate only takes exactly one repo url.')
561 mirror = Mirror(url, refs=options.ref)
563 'verbose': options.verbose,
564 'shallow': options.shallow,
565 'bootstrap': not options.no_bootstrap,
566 'ignore_lock': options.ignore_locks,
569 kwargs['depth'] = options.depth
570 mirror.populate(**kwargs)
573 @subcommand.usage('Fetch new commits into cache and current checkout')
574 def CMDfetch(parser, args):
575 """Update mirror, and fetch in cwd."""
576 parser.add_option('--all', action='store_true', help='Fetch all remotes')
577 options, args = parser.parse_args(args)
579 # Figure out which remotes to fetch. This mimics the behavior of regular
580 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
581 # this will NOT try to traverse up the branching structure to find the
582 # ultimate remote to update.
585 assert not args, 'fatal: fetch --all does not take a repository argument'
586 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
590 current_branch = subprocess.check_output(
591 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
592 if current_branch != 'HEAD':
593 upstream = subprocess.check_output(
594 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
596 if upstream and upstream != '.':
601 cachepath = Mirror.GetCachePath()
602 git_dir = os.path.abspath(subprocess.check_output(
603 [Mirror.git_exe, 'rev-parse', '--git-dir']))
604 git_dir = os.path.abspath(git_dir)
605 if git_dir.startswith(cachepath):
606 mirror = Mirror.FromPath(git_dir)
609 for remote in remotes:
610 remote_url = subprocess.check_output(
611 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
612 if remote_url.startswith(cachepath):
613 mirror = Mirror.FromPath(remote_url)
614 mirror.print = lambda *args: None
615 print('Updating git cache...')
617 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
621 @subcommand.usage('[url of repo to unlock, or -a|--all]')
622 def CMDunlock(parser, args):
623 """Unlock one or all repos if their lock files are still around."""
624 parser.add_option('--force', '-f', action='store_true',
625 help='Actually perform the action')
626 parser.add_option('--all', '-a', action='store_true',
627 help='Unlock all repository caches')
628 options, args = parser.parse_args(args)
629 if len(args) > 1 or (len(args) == 0 and not options.all):
630 parser.error('git cache unlock takes exactly one repo url, or --all')
632 if not options.force:
633 cachepath = Mirror.GetCachePath()
634 lockfiles = [os.path.join(cachepath, path)
635 for path in os.listdir(cachepath)
636 if path.endswith('.lock') and os.path.isfile(path)]
637 parser.error('git cache unlock requires -f|--force to do anything. '
638 'Refusing to unlock the following repo caches: '
639 ', '.join(lockfiles))
643 unlocked_repos.extend(Mirror.UnlockAll())
647 unlocked_repos.append(m.mirror_path)
650 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
654 class OptionParser(optparse.OptionParser):
655 """Wrapper class for OptionParser to handle global options."""
657 def __init__(self, *args, **kwargs):
658 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
659 self.add_option('-c', '--cache-dir',
660 help='Path to the directory containing the cache')
661 self.add_option('-v', '--verbose', action='count', default=1,
662 help='Increase verbosity (can be passed multiple times)')
663 self.add_option('-q', '--quiet', action='store_true',
664 help='Suppress all extraneous output')
666 def parse_args(self, args=None, values=None):
667 options, args = optparse.OptionParser.parse_args(self, args, values)
671 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
672 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
675 global_cache_dir = Mirror.GetCachePath()
677 global_cache_dir = None
678 if options.cache_dir:
679 if global_cache_dir and (
680 os.path.abspath(options.cache_dir) !=
681 os.path.abspath(global_cache_dir)):
682 logging.warn('Overriding globally-configured cache directory.')
683 Mirror.SetCachePath(options.cache_dir)
689 dispatcher = subcommand.CommandDispatcher(__name__)
690 return dispatcher.execute(OptionParser(), argv)
693 if __name__ == '__main__':
694 sys.exit(main(sys.argv[1:]))