1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
9 from file_system import FileSystem, StatInfo, FileNotFoundError
10 from future import All, Future
11 from path_util import AssertIsDirectory, IsDirectory, ToDirectory
12 from third_party.json_schema_compiler.memoize import memoize
15 class CachingFileSystem(FileSystem):
16 '''FileSystem which implements a caching layer on top of |file_system|. If
17 |fail_on_miss| is True then cache misses throw a FileNotFoundError rather than
18 falling back onto the underlying FileSystem.
20 If the underlying FileSystem is versioned (i.e., it implements GetVersion to
21 return something other than None), this will create a persistent stat cache
22 (keyed on the FileSystem instance's version) as an additional optimization.
24 def __init__(self, file_system, object_store_creator, fail_on_miss=False):
25 self._file_system = file_system
26 self._fail_on_miss = fail_on_miss
27 def create_object_store(category, try_versioning=False, **optargs):
28 version = file_system.GetVersion()
29 versioned = try_versioning and version is not None
31 identity = '%s/%s' % (file_system.GetIdentity(), version)
33 identity = file_system.GetIdentity()
34 optargs['start_empty'] = optargs.get('start_empty', not versioned)
35 return object_store_creator.Create(
37 category='%s/%s' % (identity, category),
39 self._stat_cache = create_object_store('stat', try_versioning=True)
40 # The read caches can start populated (start_empty=False) because file
41 # updates are picked up by the stat, so it doesn't need the force-refresh
42 # which starting empty is designed for. Without this optimisation, cron
43 # runs are extra slow.
44 self._read_cache = create_object_store('read', start_empty=False)
45 self._walk_cache = create_object_store('walk', start_empty=False)
48 return self._file_system.Refresh()
50 def StatAsync(self, path):
51 '''Stats the directory given, or if a file is given, stats the file's parent
52 directory to get info about the file.
54 # Always stat the parent directory, since it will have the stat of the child
55 # anyway, and this gives us an entire directory's stat info at once.
56 dir_path, file_path = posixpath.split(path)
57 dir_path = ToDirectory(dir_path)
59 def make_stat_info(dir_stat):
60 '''Converts a dir stat into the correct resulting StatInfo; if the Stat
61 was for a file, the StatInfo should just contain that file.
65 # Was a file stat. Extract that file.
66 file_version = dir_stat.child_versions.get(file_path)
67 if file_version is None:
68 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
69 (path, dir_path, dir_stat.child_versions))
70 return StatInfo(file_version)
72 def raise_cache_miss(path):
73 raise FileNotFoundError('Got cache miss when trying to stat %s' % path)
75 dir_stat = self._stat_cache.Get(dir_path).Get()
76 if dir_stat is not None:
77 return Future(callback=lambda: make_stat_info(dir_stat))
79 if self._fail_on_miss:
80 logging.warning('Bailing on stat cache miss for %s' % dir_path)
81 return Future(callback=lambda: raise_cache_miss(dir_path))
84 assert dir_stat is not None # should have raised a FileNotFoundError
85 # We only ever need to cache the dir stat.
86 self._stat_cache.Set(dir_path, dir_stat)
87 return make_stat_info(dir_stat)
88 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
91 def _MemoizedStatAsyncFromFileSystem(self, dir_path):
92 '''This is a simple wrapper to memoize Futures to directory stats, since
93 StatAsync makes heavy use of it. Only cache directories so that the
94 memoized cache doesn't blow up.
96 assert IsDirectory(dir_path)
97 return self._file_system.StatAsync(dir_path)
99 def Read(self, paths, skip_not_found=False):
100 '''Reads a list of files. If a file is cached and it is not out of
101 date, it is returned. Otherwise, the file is retrieved from the file system.
103 # Files which aren't found are cached in the read object store as
104 # (path, None, None). This is to prevent re-reads of files we know
106 cached_read_values = self._read_cache.GetMulti(paths).Get()
107 cached_stat_values = self._stat_cache.GetMulti(paths).Get()
109 # Populate a map of paths to Futures to their stat. They may have already
110 # been cached in which case their Future will already have been constructed
115 if isinstance(error, FileNotFoundError):
120 stat_value = cached_stat_values.get(path)
121 if stat_value is None:
122 stat_future = self.StatAsync(path)
124 stat_future = stat_future.Then(lambda x: x, handle)
126 stat_future = Future(value=stat_value)
127 stat_futures[path] = stat_future
129 # Filter only the cached data which is up to date by comparing to the latest
130 # stat. The cached read data includes the cached version. Remove it for
131 # the result returned to callers. |version| == None implies a non-existent
133 up_to_date_data = dict(
134 (path, data) for path, (data, version) in cached_read_values.iteritems()
135 if version is not None and stat_futures[path].Get().version == version)
138 # Filter out paths which we know do not exist, i.e. if |path| is in
139 # |cached_read_values| *and* has a None version, then it doesn't exist.
140 # See the above declaration of |cached_read_values| for more information.
141 paths = [path for path in paths
142 if cached_read_values.get(path, (None, True))[1]]
144 if len(up_to_date_data) == len(paths):
145 # Everything was cached and up-to-date.
146 return Future(value=up_to_date_data)
148 def next(new_results):
149 # Update the cache. This is a path -> (data, version) mapping.
150 self._read_cache.SetMulti(
151 dict((path, (new_result, stat_futures[path].Get().version))
152 for path, new_result in new_results.iteritems()))
153 # Update the read cache to include files that weren't found, to prevent
154 # constantly trying to read a file we now know doesn't exist.
155 self._read_cache.SetMulti(
156 dict((path, (None, None)) for path in paths
157 if stat_futures[path].Get() is None))
158 new_results.update(up_to_date_data)
160 # Read in the values that were uncached or old.
161 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
162 skip_not_found=skip_not_found).Then(next)
164 def GetCommitID(self):
165 return self._file_system.GetCommitID()
167 def GetPreviousCommitID(self):
168 return self._file_system.GetPreviousCommitID()
170 def Walk(self, root, depth=-1):
171 '''Overrides FileSystem.Walk() to provide caching functionality.
173 def file_lister(root):
174 res, root_stat = All((self._walk_cache.Get(root),
175 self.StatAsync(root))).Get()
177 if res and res[2] == root_stat.version:
178 dirs, files = res[0], res[1]
180 # Wasn't cached, or not up to date.
182 for f in self.ReadSingle(root).Get():
187 # Update the cache. This is a root -> (dirs, files, version) mapping.
188 self._walk_cache.Set(root, (dirs, files, root_stat.version))
190 return self._file_system.Walk(root, depth=depth, file_lister=file_lister)
192 def GetIdentity(self):
193 return self._file_system.GetIdentity()
195 def GetVersion(self):
196 return self._file_system.GetVersion()
199 return '%s of <%s>' % (type(self).__name__, repr(self._file_system))