2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """A library to assist automatically downloading files.
8 This library is used by scripts that download tarballs, zipfiles, etc. as part
20 SOURCE_STAMP = 'SOURCE_URL'
21 HASH_STAMP = 'SOURCE_SHA1'
23 class HashError(Exception):
24 def __init__(self, download_url, expected_hash, actual_hash):
25 self.download_url = download_url
26 self.expected_hash = expected_hash
27 self.actual_hash = actual_hash
30 return 'Got hash "%s" but expected hash "%s" for "%s"' % (
31 self.actual_hash, self.expected_hash, self.download_url)
33 def EnsureFileCanBeWritten(filename):
34 directory = os.path.dirname(filename)
35 if not os.path.exists(directory):
36 os.makedirs(directory)
39 def WriteData(filename, data):
40 EnsureFileCanBeWritten(filename)
41 f = open(filename, 'wb')
46 def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
47 EnsureFileCanBeWritten(filename)
48 dst = open(filename, 'wb')
51 data = stream.read(chunk_size)
56 # Indicate that we're still writing.
61 sys.stdout.write('\n')
65 def DoesStampMatch(stampfile, expected, index):
67 f = open(stampfile, 'r')
70 if stamp.split('\n')[index] == expected:
71 return 'already up-to-date.'
72 elif stamp.startswith('manual'):
73 return 'manual override.'
79 def WriteStamp(stampfile, data):
80 EnsureFileCanBeWritten(stampfile)
81 f = open(stampfile, 'w')
86 def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
87 stampfile = os.path.join(path, stamp_name)
89 stampmatch = DoesStampMatch(stampfile, stamp_contents, index)
91 # If toolchain was downloaded and/or created manually then keep it untouched
92 if stampmatch == 'manual override.':
95 # Check if the stampfile is older than the minimum last mod time
98 stamp_time = os.stat(stampfile).st_mtime
99 if stamp_time <= min_time:
107 def WriteSourceStamp(path, url):
108 stampfile = os.path.join(path, SOURCE_STAMP)
109 WriteStamp(stampfile, url)
112 def WriteHashStamp(path, hash_val):
113 hash_stampfile = os.path.join(path, HASH_STAMP)
114 WriteStamp(hash_stampfile, hash_val)
117 def _HashFileHandle(fh):
118 """sha1 of a file like object.
121 fh: file handle like object to hash.
125 hasher = hashlib.sha1()
134 return hasher.hexdigest()
137 def HashFile(filename):
138 """sha1 a file on disk.
141 filename: filename to hash.
145 fh = open(filename, 'rb')
146 return _HashFileHandle(fh)
149 def HashUrlByDownloading(url):
150 """sha1 the data at an url.
153 url: url to download from.
155 sha1 of the data at the url.
158 fh = urllib2.urlopen(url)
160 sys.stderr.write('Failed fetching URL: %s\n' % url)
162 return _HashFileHandle(fh)
165 # Attempts to get the SHA1 hash of a file given a URL by looking for
166 # an adjacent file with a ".sha1hash" suffix. This saves having to
167 # download a large tarball just to get its hash. Otherwise, we fall
168 # back to downloading the main file.
170 hash_url = '%s.sha1hash' % url
172 fh = urllib2.urlopen(hash_url)
175 except urllib2.HTTPError, exn:
177 return HashUrlByDownloading(url)
180 if not re.match('[0-9a-f]{40}\n?$', data):
181 raise AssertionError('Bad SHA1 hash file: %r' % data)
185 def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
186 hash_val=None, keep=False, verbose=False, stamp_index=0):
187 """Synchronize a destination file with a URL
189 if the URL does not match the URL stamp, then we must re-download it.
192 url: the url which will to compare against and download
193 filename: the file to create on download
194 path: the download path
195 stamp_dir: the filename containing the URL stamp to check against
196 hash_val: if set, the expected hash which must be matched
197 verbose: prints out status as it runs
198 stamp_index: index within the stamp file to check.
200 True if the file is replaced
201 False if the file is not replaced
203 HashError: if the hash does not match
206 assert url and filename
208 # If we are not keeping the tarball, or we already have it, we can
209 # skip downloading it for this reason. If we are keeping it,
212 tarball_ok = os.path.isfile(filename)
216 # If we don't need the tarball and the stamp_file matches the url, then
217 # we must be up to date. If the URL differs but the recorded hash matches
218 # the one we'll insist the tarball has, then that's good enough too.
219 # TODO(mcgrathr): Download the .sha1sum file first to compare with
220 # the cached hash, in case --file-hash options weren't used.
221 if tarball_ok and stamp_dir is not None:
222 if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
224 print '%s is already up to date.' % filename
226 if (hash_val is not None and
227 StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
229 print '%s is identical to the up to date file.' % filename
232 if (os.path.isfile(filename)
233 and hash_val is not None
234 and hash_val == HashFile(filename)):
238 print 'Updating %s\n\tfrom %s.' % (filename, url)
239 EnsureFileCanBeWritten(filename)
240 http_download.HttpDownload(url, filename)
243 tar_hash = HashFile(filename)
244 if hash_val != tar_hash:
245 raise HashError(actual_hash=tar_hash, expected_hash=hash_val,