1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
8 import xml.dom.minidom as xml
9 from xml.parsers.expat import ExpatError
11 from appengine_url_fetcher import AppEngineUrlFetcher
12 from docs_server_utils import StringIdentity
13 from file_system import (
14 FileNotFoundError, FileSystem, FileSystemError, StatInfo, ToUnicode)
15 from future import Future
20 '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
21 of all mismatched tags.
24 return xml.parseString(html)
25 except ExpatError as e:
26 return _ParseHTML('\n'.join(
27 line for (i, line) in enumerate(html.split('\n'))
28 if e.lineno != i + 1))
31 '''Like node.innerText in JS DOM, but strips surrounding whitespace.
35 text.append(node.nodeValue)
36 if hasattr(node, 'childNodes'):
37 for child_node in node.childNodes:
38 text.append(_InnerText(child_node))
39 return ''.join(text).strip()
41 def _CreateStatInfo(html):
45 # Try all of the tables until we find the ones that contain the data (the
46 # directory and file versions are in different tables).
47 for table in _ParseHTML(html).getElementsByTagName('table'):
48 # Within the table there is a list of files. However, there may be some
49 # things beforehand; a header, "parent directory" list, etc. We will deal
50 # with that below by being generous and just ignoring such rows.
51 rows = table.getElementsByTagName('tr')
54 cells = row.getElementsByTagName('td')
56 # The version of the directory will eventually appear in the soup of
57 # table rows, like this:
60 # <td>Directory revision:</td>
61 # <td><a href=... title="Revision 214692">214692</a> (of...)</td>
64 # So look out for that.
65 if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
66 links = cells[1].getElementsByTagName('a')
68 raise FileSystemError('ViewVC assumption invalid: directory ' +
69 'revision content did not have 2 <a> ' +
70 ' elements, instead %s' % _InnerText(cells[1]))
71 this_parent_version = _InnerText(links[0])
72 int(this_parent_version) # sanity check
73 if parent_version is not None:
74 raise FileSystemError('There was already a parent version %s, and ' +
75 ' we just found a second at %s' %
76 (parent_version, this_parent_version))
77 parent_version = this_parent_version
79 # The version of each file is a list of rows with 5 cells: name, version,
80 # age, author, and last log entry. Maybe the columns will change; we're
81 # at the mercy viewvc, but this constant can be easily updated.
84 name_element, version_element, _, __, ___ = cells
86 name = _InnerText(name_element) # note: will end in / for directories
88 version = int(_InnerText(version_element))
91 child_versions[name] = str(version)
93 if parent_version and child_versions:
96 return StatInfo(parent_version, child_versions)
98 class _AsyncFetchFuture(object):
99 def __init__(self, paths, fetcher, binary, args=None):
100 def apply_args(path):
101 return path if args is None else '%s?%s' % (path, args)
102 # A list of tuples of the form (path, Future).
103 self._fetches = [(path, fetcher.FetchAsync(apply_args(path)))
107 self._binary = binary
109 def _ListDir(self, directory):
110 dom = xml.parseString(directory)
111 files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')]
117 for path, future in self._fetches:
119 result = future.Get()
120 except Exception as e:
121 raise FileSystemError('Error fetching %s for Get: %s' %
122 (path, traceback.format_exc()))
124 if result.status_code == 404:
125 raise FileNotFoundError('Got 404 when fetching %s for Get, content %s' %
126 (path, result.content))
127 if result.status_code != 200:
128 raise FileSystemError('Got %s when fetching %s for Get, content %s' %
129 (result.status_code, path, result.content))
131 if path.endswith('/'):
132 self._value[path] = self._ListDir(result.content)
133 elif not self._binary:
134 self._value[path] = ToUnicode(result.content)
136 self._value[path] = result.content
137 if self._error is not None:
141 class SubversionFileSystem(FileSystem):
142 '''Class to fetch resources from src.chromium.org.
145 def Create(branch='trunk', revision=None):
146 if branch == 'trunk':
147 svn_path = 'trunk/src/%s' % svn_constants.EXTENSIONS_PATH
149 svn_path = 'branches/%s/src/%s' % (branch, svn_constants.EXTENSIONS_PATH)
150 return SubversionFileSystem(
151 AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
152 AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
156 def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
157 self._file_fetcher = file_fetcher
158 self._stat_fetcher = stat_fetcher
159 self._svn_path = svn_path
160 self._revision = revision
162 def Read(self, paths, binary=False):
164 if self._revision is not None:
165 # |fetcher| gets from svn.chromium.org which uses p= for version.
166 args = 'p=%s' % self._revision
167 return Future(delegate=_AsyncFetchFuture(paths,
172 def Stat(self, path):
173 directory, filename = posixpath.split(path)
175 if self._revision is not None:
176 # |stat_fetch| uses viewvc which uses pathrev= for version.
177 directory += '?pathrev=%s' % self._revision
180 result = self._stat_fetcher.Fetch(directory)
181 except Exception as e:
182 raise FileSystemError('Error fetching %s for Stat: %s' %
183 (path, traceback.format_exc()))
185 if result.status_code == 404:
186 raise FileNotFoundError('Got 404 when fetching %s for Stat, content %s' %
187 (path, result.content))
188 if result.status_code != 200:
189 raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
190 (result.status_code, path, result.content))
192 stat_info = _CreateStatInfo(result.content)
193 if stat_info.version is None:
194 raise FileSystemError('Failed to find version of dir %s' % directory)
195 if path.endswith('/'):
197 if filename not in stat_info.child_versions:
198 raise FileNotFoundError(
199 '%s from %s was not in child versions for Stat' % (filename, path))
200 return StatInfo(stat_info.child_versions[filename])
202 def GetIdentity(self):
203 # NOTE: no revision here, since it would mess up the caching of reads. It
204 # probably doesn't matter since all the caching classes will use the result
205 # of Stat to decide whether to re-read - and Stat has a ceiling of the
206 # revision - so when the revision changes, so might Stat. That is enough.
207 return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))