1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Downloads files upon request in a thread/process safe way.
7 DEPRECATED: Should be merged into chromite.lib.cache.
10 from __future__ import print_function
20 fixup_path.FixupPath()
22 from chromite.lib.paygen import flock
23 from chromite.lib.paygen import urilib
24 from chromite.lib.paygen import utils
27 FETCH_RETRY_COUNT = 10
28 DEFAULT_DAYS_TO_KEEP = 1
29 ONE_DAY = 24 * 60 * 60
32 class RetriesExhaustedError(Exception):
33 """Raised when we make too many attempts to download the same file."""
36 class DownloadCache(object):
37 """This class downloads files into a local directory upon request.
39 This classes uses locking to make this safe across processes, and
44 # This will create the cache dir, and purge old contents.
45 cache = DownloadCache('/tmp/my_cache')
47 # file is copied into file, blocking for download if needed.
48 cache.GetFileCopy('gs://bucket/foo', '/tmp/foo')
50 # file is loaded into cache, but not locked.
51 tempfile = cache.GetFileInTempFile('gs://bucket/foo')
55 # Name of the purge management lock over the entire cache.
56 _CACHE_LOCK = 'cache.lock'
60 _GET_FILE_SPIN_DELAY = 2
62 def __init__(self, cache_dir, max_age=ONE_DAY, cache_size=None):
63 """Create a DownloadCache.
65 Since Purging is not performed very often, we can exceed max_age or
69 cache_dir: The directory in which to create the cache.
70 max_age: Purge files not used for this number of seconds. None for no
72 cache_size: Purge the least recently used files until the cache is
73 below this size in bytes. None for no size limit.
75 If no condition is provided, we purge all files unused for one full day.
77 # One directory for cached files, one for lock files.
78 self._cache_dir = os.path.realpath(cache_dir)
79 self._file_dir = os.path.join(self._cache_dir, self._FILE_DIR)
80 self._lock_dir = os.path.join(self._cache_dir, self._LOCK_DIR)
82 self._max_age = max_age
83 self._cache_size = cache_size
87 def _SetupCache(self):
88 """Make sure that our cache contains only files/directories we expect."""
90 # The purge lock ensures nobody else is modifying the cache in any way.
91 with self._PurgeLock(blocking=False, shared=False):
92 # We have changed the layout of our cache directories over time.
93 # Clean up any left over files.
94 expected = (self._CACHE_LOCK, self._FILE_DIR, self._LOCK_DIR)
95 unexpected = set(os.listdir(self._cache_dir)).difference(expected)
97 for name in unexpected:
98 filename = os.path.join(self._cache_dir, name)
99 if os.path.isdir(filename):
100 shutil.rmtree(filename)
104 # Create the cache file dir if needed.
105 if not os.path.exists(self._file_dir):
106 os.makedirs(self._file_dir)
108 # Create the lock dir if needed.
109 if not os.path.exists(self._lock_dir):
110 os.makedirs(self._lock_dir)
111 except flock.LockNotAcquired:
112 # If we can't get an exclusive lock on the cache, someone else set it up.
115 def _UriToCacheFile(self, uri):
116 """Convert a URI to an cache file (full path).
119 uri: The uri of the file to be cached locally.
122 The full path file name of the cache file associated with a given URI.
124 # We use the md5 hash of the URI as our file name. This allows us to
125 # store all cache files in a single directory, which removes race
126 # conditions around directories.
128 return os.path.join(self._file_dir, m.digest().encode('hex'))
130 def _PurgeLock(self, blocking=False, shared=False):
131 """Acquire a lock on the cache as a whole.
133 An exclusive lock proves nobody else will modify anything, and nobody
134 else will hold any _CacheFileLocks. A shared lock is required before
135 getting any kind of _CacheFileLock.
138 blocking: Block until the lock is available?
139 shared: Get a shared lock, or an exclusive lock?
142 flock.Lock (not acquired)
144 return flock.Lock(lock_name=self._CACHE_LOCK,
145 lock_dir=self._cache_dir,
149 def _CacheFileLock(self, cache_file, blocking=False, shared=False):
150 """Acquire a lock on a file in the cache.
152 A shared lock will ensure no other processes are modifying the file, but
153 getting it does not ensure that the file in question actually exists.
155 An exclusive lock is required to modify a cache file, this usually means
158 A shared _PurgeLock should be held before trying to acquire any type
162 cache_file: The full path of file in cache to lock.
163 blocking: Block until the lock is available?
164 shared: Get a shared lock, or an exclusive lock?
167 flock.Lock (not acquired)
169 return flock.Lock(lock_name=os.path.basename(cache_file),
170 lock_dir=self._lock_dir,
174 def Purge(self, max_age=None, cache_size=None):
175 """Attempts to clean up the cache contents.
177 Is a no-op if cache lock is not acquirable.
180 max_age: Overrides the __init__ max_age for this one
181 purge. Mostly intended for unittests.
182 cache_size: Overrides the __init__ cache_size for this one
183 purge. Mostly intended for unittests.
185 max_age = self._max_age if max_age is None else max_age
186 cache_size = self._cache_size if cache_size is None else cache_size
189 # Prevent other changes while we purge the cache.
190 with self._PurgeLock(shared=False, blocking=False):
192 # Purge files based on age, if specified.
193 if max_age is not None:
195 for f in utils.ListdirFullpath(self._file_dir):
196 if (now - os.path.getmtime(f)) > max_age:
199 # Purge files based on size, if specified.
200 if cache_size is not None:
201 # Find cache files, and sort them so the oldest are first.
202 # This defines which ones we will purge first.
203 cache_files = utils.ListdirFullpath(self._file_dir)
204 cache_files.sort(key=os.path.getmtime)
206 sizes = [os.path.getsize(f) for f in cache_files]
207 total_size = sum(sizes)
209 # Remove files until we are small enough to fit.
210 for f, size in zip(cache_files, sizes):
211 if total_size < cache_size:
216 # Just remove all lock files. They will be recreated as needed.
217 shutil.rmtree(self._lock_dir)
218 os.makedirs(self._lock_dir)
220 except flock.LockNotAcquired:
221 # If we can't get an exclusive lock on the file, it's in use, leave it.
224 def _FetchIntoCache(self, uri, cache_file):
225 """This function downloads the specified file (if not already local).
227 You must hold the PurgeLock when calling this method.
229 If it can't get an exclusive lock, or if the file is already present,
233 uri: uri of the file to download.
234 cache_file: location in the cache to download too.
237 True if a file was downloaded, False otherwise. (used in unittests)
240 May raise any download error associated with the URI's protocol.
243 # Write protect the file before modifying it.
244 with self._CacheFileLock(cache_file, shared=False, blocking=False):
245 if os.path.exists(cache_file):
249 # Actually download the file.
250 urilib.Copy(uri, cache_file)
251 # Make the file read-only by everyone.
252 os.chmod(cache_file, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
254 # If there was any error with the download, make sure no partial
255 # file was left behind.
256 if os.path.exists(cache_file):
257 os.unlink(cache_file)
260 except flock.LockNotAcquired:
261 # In theory, if it's already locked, that either means a download is in
262 # progress, or there is a shared lock which means it's already present.
265 # Try to cleanup the cache after we just grew it.
269 def GetFileObject(self, uri):
270 """Get an open readonly File object for the file in the cache.
272 This method will populate the cache with the requested file if it's
273 not already present, and will return an already opened read only file
274 object for the cache contents.
276 Even if the file is purged, this File object will remain valid until
277 closed. Since this method is the only legitimate way to get access to
278 a file in the cache, and it returns read only Files, cache files should
281 This method may block while trying to download and/or lock the file.
284 uri: The uri of the file to access.
287 File object opened with 'rb' mode.
290 Exceptions from a failed download are passed through 'as is' from
291 the underlying download mechanism.
293 RetriesExhaustedError if we need a large number of attempts to
294 download the same file.
296 cache_file = self._UriToCacheFile(uri)
298 # We keep trying until we succeed, or throw an exception.
299 for _ in xrange(FETCH_RETRY_COUNT):
300 with self._PurgeLock(shared=True, blocking=True):
301 # Attempt to download the file, if needed.
302 self._FetchIntoCache(uri, cache_file)
304 # Get a shared lock on the file. This can block if another process
305 # has a non-shared lock (ie: they are downloading)
306 with self._CacheFileLock(cache_file, shared=True, blocking=True):
308 if os.path.exists(cache_file):
309 fd = open(cache_file, 'rb')
311 # Touch the timestamp on cache file to help purging logic.
312 os.utime(cache_file, None)
316 # We don't have the file in our cache. There are three ways this
319 # A) Another process was trying to download, blocked our download,
320 # then got a download error.
321 # B) Another process removed the file(illegally). We will recover as
322 # soon as all read-only locks are released.
323 # C) Our download failed without throwing an exception. We will
324 # block forever if this continues to happen.
326 # Sleep so we don't spin too quickly, then try again.
327 time.sleep(self._GET_FILE_SPIN_DELAY)
329 raise RetriesExhaustedError(uri)
331 def GetFileCopy(self, uri, filepath):
332 """Copy a cache file into your file (downloading as needed).
334 Copy the file into your specified filename (creating or overridding). It
335 will be downloaded into the cache first, if needed. It is your
336 responsibility to manage filepath after it is populated.
339 uri: The uri of the file to access.
340 filepath: The name of the file to copy uri contents into.
343 Exceptions from a failed download are passed through 'as is' from
344 the underlying download mechanism.
346 with self.GetFileObject(uri) as src:
347 with open(filepath, 'wb+') as dest:
348 shutil.copyfileobj(src, dest)
350 def GetFileInTempFile(self, uri):
351 """Copy a cache file into a tempfile (downloading as needed).
353 The cache file is copied into a tempfile.NamedTemporaryFile.
355 This file is owned strictly by the caller and can be modified/deleted as
356 needed. Closing the NamedTemporaryFile will delete it.
359 uri: The uri of the file to access.
362 tempfile.NamedTemporaryFile containing the requested file.
363 NamedTemporaryFile.name will contain the file's name.
366 Exceptions from a failed download are passed through 'as is' from
367 the underlying download mechanism.
369 temp = tempfile.NamedTemporaryFile()
370 self.GetFileCopy(uri, temp.name)
373 # Cache objects can be used with "with" statements.
377 def __exit__(self, _type, _value, _traceback):