1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
8 from file_system import FileSystem, StatInfo, FileNotFoundError
9 from future import Future
10 from path_util import IsDirectory, ToDirectory
11 from third_party.json_schema_compiler.memoize import memoize
14 class CachingFileSystem(FileSystem):
15 '''FileSystem which implements a caching layer on top of |file_system|. It's
16 smart, using Stat() to decided whether to skip Read()ing from |file_system|,
17 and only Stat()ing directories never files.
19 def __init__(self, file_system, object_store_creator):
20 self._file_system = file_system
21 def create_object_store(category, **optargs):
22 return object_store_creator.Create(
24 category='%s/%s' % (file_system.GetIdentity(), category),
26 self._stat_object_store = create_object_store('stat')
27 # The read caches can start populated (start_empty=False) because file
28 # updates are picked up by the stat, so it doesn't need the force-refresh
29 # which starting empty is designed for. Without this optimisation, cron
30 # runs are extra slow.
31 self._read_object_store = create_object_store('read', start_empty=False)
34 return self._file_system.Refresh()
37 return self.StatAsync(path).Get()
39 def StatAsync(self, path):
40 '''Stats the directory given, or if a file is given, stats the file's parent
41 directory to get info about the file.
43 # Always stat the parent directory, since it will have the stat of the child
44 # anyway, and this gives us an entire directory's stat info at once.
45 dir_path, file_path = posixpath.split(path)
46 dir_path = ToDirectory(dir_path)
48 def make_stat_info(dir_stat):
49 '''Converts a dir stat into the correct resulting StatInfo; if the Stat
50 was for a file, the StatInfo should just contain that file.
54 # Was a file stat. Extract that file.
55 file_version = dir_stat.child_versions.get(file_path)
56 if file_version is None:
57 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
58 (path, dir_path, dir_stat.child_versions))
59 return StatInfo(file_version)
61 dir_stat = self._stat_object_store.Get(dir_path).Get()
62 if dir_stat is not None:
63 return Future(value=make_stat_info(dir_stat))
66 assert dir_stat is not None # should have raised a FileNotFoundError
67 # We only ever need to cache the dir stat.
68 self._stat_object_store.Set(dir_path, dir_stat)
69 return make_stat_info(dir_stat)
70 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
73 def _MemoizedStatAsyncFromFileSystem(self, dir_path):
74 '''This is a simple wrapper to memoize Futures to directory stats, since
75 StatAsync makes heavy use of it. Only cache directories so that the
76 memoized cache doesn't blow up.
78 assert IsDirectory(dir_path)
79 return self._file_system.StatAsync(dir_path)
81 def Read(self, paths, skip_not_found=False):
82 '''Reads a list of files. If a file is in memcache and it is not out of
83 date, it is returned. Otherwise, the file is retrieved from the file system.
85 cached_read_values = self._read_object_store.GetMulti(paths).Get()
86 cached_stat_values = self._stat_object_store.GetMulti(paths).Get()
88 # Populate a map of paths to Futures to their stat. They may have already
89 # been cached in which case their Future will already have been constructed
94 if isinstance(error, FileNotFoundError):
99 stat_value = cached_stat_values.get(path)
100 if stat_value is None:
101 stat_future = self.StatAsync(path)
103 stat_future = stat_future.Then(lambda x: x, handle)
105 stat_future = Future(value=stat_value)
106 stat_futures[path] = stat_future
108 # Filter only the cached data which is fresh by comparing to the latest
109 # stat. The cached read data includes the cached version. Remove it for
110 # the result returned to callers.
112 (path, data) for path, (data, version) in cached_read_values.iteritems()
113 if stat_futures[path].Get().version == version)
115 if len(fresh_data) == len(paths):
116 # Everything was cached and up-to-date.
117 return Future(value=fresh_data)
119 def next(new_results):
120 # Update the cache. This is a path -> (data, version) mapping.
121 self._read_object_store.SetMulti(
122 dict((path, (new_result, stat_futures[path].Get().version))
123 for path, new_result in new_results.iteritems()))
124 new_results.update(fresh_data)
126 # Read in the values that were uncached or old.
127 return self._file_system.Read(set(paths) - set(fresh_data.iterkeys()),
128 skip_not_found=skip_not_found).Then(next)
130 def GetIdentity(self):
131 return self._file_system.GetIdentity()
134 return '%s of <%s>' % (type(self).__name__, repr(self._file_system))