1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from base64 import b64decode
7 from itertools import izip
14 from appengine_url_fetcher import AppEngineUrlFetcher
15 from appengine_wrappers import IsDownloadError, app_identity
16 from docs_server_utils import StringIdentity
17 from file_system import (FileNotFoundError,
20 FileSystemThrottledError,
22 from future import All, Future
23 from path_util import AssertIsValid, IsDirectory, ToDirectory
24 from third_party.json_schema_compiler.memoize import memoize
25 from url_constants import (GITILES_BASE,
27 GITILES_BRANCHES_PATH,
31 _JSON_FORMAT = '?format=JSON'
32 _TEXT_FORMAT = '?format=TEXT'
33 _AUTH_PATH_PREFIX = '/a'
36 def _ParseGitilesJson(json_data):
37 '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON
38 data coming from Gitiles views.
40 return json.loads(json_data[json_data.find('{'):])
43 def _CreateStatInfo(json_data):
44 '''Returns a StatInfo object comprised of the tree ID for |json_data|,
45 as well as the tree IDs for the entries in |json_data|.
47 tree = _ParseGitilesJson(json_data)
48 return StatInfo(tree['id'],
49 dict((e['name'], e['id']) for e in tree['entries']))
52 class GitilesFileSystem(FileSystem):
53 '''Class to fetch filesystem data from the Chromium project's gitiles
57 def Create(branch='master', commit=None):
58 token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
59 path_prefix = '' if token is None else _AUTH_PATH_PREFIX
61 base_url = '%s%s/%s/%s' % (
62 GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit)
63 elif branch is 'master':
64 base_url = '%s%s/%s/master' % (
65 GITILES_BASE, path_prefix, GITILES_SRC_ROOT)
67 base_url = '%s%s/%s/%s/%s' % (
68 GITILES_BASE, path_prefix, GITILES_SRC_ROOT,
69 GITILES_BRANCHES_PATH, branch)
70 return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit)
72 def __init__(self, fetcher, base_url, branch, commit):
73 self._fetcher = fetcher
74 self._base_url = base_url
78 def _FetchAsync(self, url):
79 '''Convenience wrapper for fetcher.FetchAsync, so callers don't
80 need to use posixpath.join.
83 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
84 return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url),
85 access_token=access_token)
87 def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False):
88 '''Returns a future to cleanly resolve |fetch_future|.
91 if skip_not_found and IsDownloadError(e):
93 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
94 raise exc_type('%s fetching %s for Get from %s: %s' %
95 (type(e).__name__, path, self._base_url, traceback.format_exc()))
97 def get_content(result):
98 if result.status_code == 404:
101 raise FileNotFoundError('Got 404 when fetching %s for Get from %s' %
102 (path, self._base_url))
103 if result.status_code == 429:
104 logging.warning('Access throttled when fetching %s for Get from %s' %
105 (path, self._base_url))
106 raise FileSystemThrottledError(
107 'Access throttled when fetching %s for Get from %s' %
108 (path, self._base_url))
109 if result.status_code != 200:
110 raise FileSystemError(
111 'Got %s when fetching %s for Get from %s, content %s' %
112 (result.status_code, path, self._base_url, result.content))
113 return result.content
115 return fetch_future.Then(get_content, handle)
117 def Read(self, paths, skip_not_found=False):
118 # Directory content is formatted in JSON in Gitiles as follows:
121 # "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID.
126 # "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID.
127 # "name": ".gitignore"
132 def list_dir(json_data):
133 entries = _ParseGitilesJson(json_data).get('entries', [])
134 return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries]
136 def fixup_url_format(path):
137 # By default, Gitiles URLs display resources in HTML. To get resources
138 # suitable for our consumption, a '?format=' string must be appended to
139 # the URL. The format may be one of 'JSON' or 'TEXT' for directory or
140 # text resources, respectively.
141 return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT)
143 # A list of tuples of the form (path, Future).
144 fetches = [(path, self._FetchAsync(fixup_url_format(path)))
147 def parse_contents(results):
149 for path, content in izip(paths, results):
152 # Gitiles encodes text content in base64 (see
153 # http://tools.ietf.org/html/rfc4648 for info about base64).
154 value[path] = (list_dir if IsDirectory(path) else b64decode)(content)
157 return All(self._ResolveFetchContent(path, future, skip_not_found)
158 for path, future in fetches).Then(parse_contents)
161 return Future(value=())
164 def _GetCommitInfo(self, key):
165 '''Gets the commit information specified by |key|.
167 The JSON view for commit info looks like:
169 "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID.
170 "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53", # Tree ID.
172 "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID.
177 "time": "Tue Aug 12 17:17:21 2014"
182 "time": "Tue Aug 12 17:18:28 2014"
188 # Commit information for a branch is obtained by appending '?format=JSON'
189 # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is
190 # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves
191 # the root directory JSON content, whereas the former serves the branch
192 # commit info JSON content.
194 access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
195 fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT,
196 access_token=access_token)
197 content_future = self._ResolveFetchContent(self._base_url, fetch_future)
198 return content_future.Then(lambda json: _ParseGitilesJson(json)[key])
200 def GetCommitID(self):
201 '''Returns a future that resolves to the commit ID for this branch.
203 return self._GetCommitInfo('commit')
205 def GetPreviousCommitID(self):
206 '''Returns a future that resolves to the previous commit ID for this branch.
208 return self._GetCommitInfo('parents').Then(lambda parents: parents[0])
210 def StatAsync(self, path):
211 dir_, filename = posixpath.split(path)
213 stat_info = _CreateStatInfo(content)
214 if stat_info.version is None:
215 raise FileSystemError('Failed to find version of dir %s' % dir_)
216 if IsDirectory(path):
218 if filename not in stat_info.child_versions:
219 raise FileNotFoundError(
220 '%s from %s was not in child versions for Stat' % (filename, path))
221 return StatInfo(stat_info.child_versions[filename])
223 fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT)
224 return self._ResolveFetchContent(path, fetch_future).Then(stat)
226 def GetIdentity(self):
227 # NOTE: Do not use commit information to create the string identity.
228 # Doing so will mess up caching.
229 if self._commit is None and self._branch != 'master':
230 str_id = '%s/%s/%s/%s' % (
231 GITILES_BASE, GITILES_SRC_ROOT, GITILES_BRANCHES_PATH, self._branch)
233 str_id = '%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT)
234 return '@'.join((self.__class__.__name__, StringIdentity(str_id)))