repocache: implement refs hack for fetching
authorMarkus Lehtonen <markus.lehtonen@linux.intel.com>
Wed, 17 Sep 2014 12:14:49 +0000 (15:14 +0300)
committerMarkus Lehtonen <markus.lehtonen@linux.intel.com>
Thu, 18 Sep 2014 10:29:39 +0000 (13:29 +0300)
Allows fetching/cloning remote repositories that have refs/heads/*
pointing to tag. The hack is disabled, by default.

Gerrit allows setting refs/heads/* pointing to tag objects. However, git
prevents this causing problems for repocache. Fetch will fail with
something like:
  error: Trying to write non-commit object x to branch refs/heads/y

A hackish workaround for this is to make GIT_DIR/refs a symlink. Then,
first fetch to GIT_DIR/refs.alt/fetch/* and then symlink GIT_DIR/refs to
point to GIT_DIR/refs.alt/fetch/.

The symlink hack in filesystem is automatically removed from the cached
repository if a CachedRepository instance (using the same cached repo)
is created with the hack disabled.

Change-Id: I1addf10fa5f805ab8f2b696e8ccaad9e56967bd0
Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com>
gbp_repocache/__init__.py
tests/test_gbp_repocache.py

index c129baebcf13ece09630eac3dfb1865147d9f1c5..ae600f1f8f1299c07072761717c5e37469768e60 100644 (file)
@@ -75,15 +75,65 @@ class MirrorGitRepository(GitRepository): # pylint: disable=R0904
             except IOError as err:
                 raise GitRepositoryError('Failed write ref %s: %s' % (ref, err))
 
-    def force_fetch(self):
+    def _symlink_refs(self, tgt_path):
+        """Symlink refs directory - a relative symlink inside GIT_DIR"""
+        tgt_abspath = os.path.abspath(os.path.join(self.git_dir, tgt_path))
+        refs_path = os.path.join(self.git_dir, 'refs')
+        # Create symlink target directory
+        if not os.path.exists(tgt_abspath):
+            os.makedirs(tgt_abspath)
+        # Remove existing directory or symlink
+        if not os.path.islink(refs_path):
+            LOGGER.info('Removing old refs directory %s', refs_path)
+            shutil.rmtree(refs_path)
+        elif os.path.exists(refs_path):
+            os.unlink(refs_path)
+
+        LOGGER.debug("Symlinking %s -> %s", tgt_path, refs_path)
+        os.symlink(tgt_path, refs_path)
+
+    def force_fetch(self, refs_hack=False):
         """Fetch with specific arguments"""
         # Set HEAD temporarily as fetch with an invalid non-symbolic HEAD fails
         orig_head = self.get_ref('HEAD')
         self.set_ref('HEAD', 'refs/heads/non-existent-tmp-for-fetching')
 
-        # Update all refs
+        if refs_hack:
+            # Create temporary refs directory for fetching
+            # We need this because Gerrit is able to create refs/heads/* that
+            # git refuses to fetch (to refs/heads/*), more specifically
+            # branches pointing to tag objects
+            alt_refs_root = 'refs.alt'
+            alt_refs = os.path.join(alt_refs_root, 'fetch')
+            self._symlink_refs(alt_refs_root)
+
+            # Remove possible packed refs as they are not aligned with refs
+            # after the hackish fetch, e.g. packed refs might contain refs that
+            # do not exist in remote anymore
+            packed_refs = os.path.join(self.git_dir, 'packed-refs')
+            if os.path.exists(packed_refs):
+                os.unlink(packed_refs)
+
+            # Fetch all refs into alternate namespace
+            refspec = '+refs/*:refs/fetch/*'
+        else:
+            # Remove possible refs symlink
+            refs_path = os.path.join(self.git_dir, 'refs')
+            if os.path.islink(refs_path):
+                # Remove link target directory
+                link_tgt = os.path.join(self.git_dir, os.readlink(refs_path))
+                LOGGER.debug('Removing refs symlink and link target %s',
+                             link_tgt)
+                shutil.rmtree(link_tgt)
+                # Remove link and create empty refs directory
+                os.unlink(refs_path)
+                os.mkdir(refs_path)
+
+            # Update all refs
+            refspec = '+refs/*:refs/*'
+
         try:
-            self._git_command('fetch', ['-q', '-u', '-p', 'origin'])
+            self._git_command('fetch', ['-q', '-u', '-p', 'origin', refspec])
             try:
                 # Fetch remote HEAD separately
                 self._git_command('fetch', ['-q', '-u', 'origin', 'HEAD'])
@@ -93,6 +143,8 @@ class MirrorGitRepository(GitRepository): # pylint: disable=R0904
                              '0000000000000000000000000000000000000000')
         finally:
             self.set_ref('HEAD', orig_head)
+            if refs_hack:
+                self._symlink_refs(alt_refs)
 
     def force_checkout(self, commitish):
         """Checkout commitish"""
@@ -103,7 +155,7 @@ class MirrorGitRepository(GitRepository): # pylint: disable=R0904
         self._git_command('clean', ['-f', '-f', '-d', '-x'])
 
     @classmethod
-    def clone(cls, path, url, bare=False):
+    def clone(cls, path, url, bare=False, refs_hack=False):
         """Create a mirrored clone"""
         if bare:
             return super(MirrorGitRepository, cls).clone(path, url,
@@ -113,8 +165,11 @@ class MirrorGitRepository(GitRepository): # pylint: disable=R0904
             LOGGER.debug('Initializing non-bare mirrored repo')
             repo = cls.create(path)
             repo.add_remote_repo('origin', url)
+            # The refspec is a bit useless as we now use refspec in
+            # force_fetch(). But, it's better to have it in config as weel
+            # in case somebody somewhere would use the regular fetch() method.
             repo.set_config('remote.origin.fetch', '+refs/*:refs/*', True)
-            repo.force_fetch()
+            repo.force_fetch(refs_hack)
             return repo
 
     def list_tags(self, obj):
@@ -166,11 +221,12 @@ class CachedRepoError(Exception):
 class CachedRepo(object):
     """Object representing a cached repository"""
 
-    def __init__(self, base_dir, url, bare=False):
+    def __init__(self, base_dir, url, bare=False, refs_hack=False):
         self._basedir = base_dir
         self._repodir = None
         self._repo = None
         self._lock = None
+        self._refs_hack = refs_hack
 
         # Safe repo dir name
         urlbase, reponame = self._split_url(url)
@@ -253,7 +309,7 @@ class CachedRepo(object):
             else:
                 LOGGER.info('Fetching from remote')
                 try:
-                    self._repo.force_fetch()
+                    self._repo.force_fetch(refs_hack=self._refs_hack)
                 except GitRepositoryError as err:
                     raise CachedRepoError('Failed to fetch from remote: %s' %
                                            err)
@@ -261,7 +317,7 @@ class CachedRepo(object):
             LOGGER.info('Cloning from %s', url)
             try:
                 self._repo = MirrorGitRepository.clone(self._repodir, url,
-                                                      bare=bare)
+                                    bare=bare, refs_hack=self._refs_hack)
             except GitRepositoryError as err:
                 raise CachedRepoError('Failed to clone: %s' % err)
 
index be54c053a5ae84f5b22be578d76340eff9b06de4..6e6457ba7e9f03082c89d18c857531daf65ad688 100644 (file)
@@ -139,6 +139,21 @@ class TestMirrorGitRepository(UnitTestsBase):
         del os.environ['GIT_COMMITTER_DATE']
 
 
+    def test_refs_hack(self):
+        """Test git fetch refs hack"""
+        repo = MirrorGitRepository.clone('testrepo', self.orig_repo.path,
+                                         refs_hack=True)
+        refs_path = os.path.join(repo.git_dir, 'refs')
+        ok_(os.path.islink(refs_path))
+
+        # Fetch without hack and see that symlink is removed
+        repo.force_fetch()
+        ok_(not os.path.islink(refs_path))
+
+        # Re-fetch with hack enabled and see that symlink restored
+        repo.force_fetch(refs_hack=True)
+        ok_(os.path.islink(refs_path))
+
 class TestCachedRepo(UnitTestsBase):
     """Test CachedRepo class"""
 
@@ -172,8 +187,8 @@ class TestCachedRepo(UnitTestsBase):
         repo.close()
         # Make new commit in "upstream"
         self.update_repository_file(self.orig_repo, 'foo.txt', 'more data\n')
-        # Fetch
-        repo = self.MockCachedRepo(self.orig_repo.path)
+        # Fetch, with refs_hack enabled
+        repo = self.MockCachedRepo(self.orig_repo.path, refs_hack=True)
         ok_(repo)
         eq_(path, repo.repo.path)
         ok_(sha != repo.repo.rev_parse('master'))