1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
8 from file_system import FileSystem, StatInfo, FileNotFoundError
9 from future import All, Future
10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory
11 from third_party.json_schema_compiler.memoize import memoize
14 class CachingFileSystem(FileSystem):
15 '''FileSystem which implements a caching layer on top of |file_system|. It's
16 smart, using Stat() to decided whether to skip Read()ing from |file_system|,
17 and only Stat()ing directories never files.
19 def __init__(self, file_system, object_store_creator):
20 self._file_system = file_system
21 def create_object_store(category, **optargs):
22 return object_store_creator.Create(
24 category='%s/%s' % (file_system.GetIdentity(), category),
26 self._stat_cache = create_object_store('stat')
27 # The read caches can start populated (start_empty=False) because file
28 # updates are picked up by the stat, so it doesn't need the force-refresh
29 # which starting empty is designed for. Without this optimisation, cron
30 # runs are extra slow.
31 self._read_cache = create_object_store('read', start_empty=False)
32 self._walk_cache = create_object_store('walk', start_empty=False)
35 return self._file_system.Refresh()
37 def StatAsync(self, path):
38 '''Stats the directory given, or if a file is given, stats the file's parent
39 directory to get info about the file.
41 # Always stat the parent directory, since it will have the stat of the child
42 # anyway, and this gives us an entire directory's stat info at once.
43 dir_path, file_path = posixpath.split(path)
44 dir_path = ToDirectory(dir_path)
46 def make_stat_info(dir_stat):
47 '''Converts a dir stat into the correct resulting StatInfo; if the Stat
48 was for a file, the StatInfo should just contain that file.
52 # Was a file stat. Extract that file.
53 file_version = dir_stat.child_versions.get(file_path)
54 if file_version is None:
55 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
56 (path, dir_path, dir_stat.child_versions))
57 return StatInfo(file_version)
59 dir_stat = self._stat_cache.Get(dir_path).Get()
60 if dir_stat is not None:
61 return Future(callback=lambda: make_stat_info(dir_stat))
64 assert dir_stat is not None # should have raised a FileNotFoundError
65 # We only ever need to cache the dir stat.
66 self._stat_cache.Set(dir_path, dir_stat)
67 return make_stat_info(dir_stat)
68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
71 def _MemoizedStatAsyncFromFileSystem(self, dir_path):
72 '''This is a simple wrapper to memoize Futures to directory stats, since
73 StatAsync makes heavy use of it. Only cache directories so that the
74 memoized cache doesn't blow up.
76 assert IsDirectory(dir_path)
77 return self._file_system.StatAsync(dir_path)
79 def Read(self, paths, skip_not_found=False):
80 '''Reads a list of files. If a file is cached and it is not out of
81 date, it is returned. Otherwise, the file is retrieved from the file system.
83 # Files which aren't found are cached in the read object store as
84 # (path, None, None). This is to prevent re-reads of files we know
86 cached_read_values = self._read_cache.GetMulti(paths).Get()
87 cached_stat_values = self._stat_cache.GetMulti(paths).Get()
89 # Populate a map of paths to Futures to their stat. They may have already
90 # been cached in which case their Future will already have been constructed
95 if isinstance(error, FileNotFoundError):
100 stat_value = cached_stat_values.get(path)
101 if stat_value is None:
102 stat_future = self.StatAsync(path)
104 stat_future = stat_future.Then(lambda x: x, handle)
106 stat_future = Future(value=stat_value)
107 stat_futures[path] = stat_future
109 # Filter only the cached data which is up to date by comparing to the latest
110 # stat. The cached read data includes the cached version. Remove it for
111 # the result returned to callers. |version| == None implies a non-existent
113 up_to_date_data = dict(
114 (path, data) for path, (data, version) in cached_read_values.iteritems()
115 if version is not None and stat_futures[path].Get().version == version)
118 # Filter out paths which we know do not exist, i.e. if |path| is in
119 # |cached_read_values| *and* has a None version, then it doesn't exist.
120 # See the above declaration of |cached_read_values| for more information.
121 paths = [path for path in paths
122 if cached_read_values.get(path, (None, True))[1]]
124 if len(up_to_date_data) == len(paths):
125 # Everything was cached and up-to-date.
126 return Future(value=up_to_date_data)
128 def next(new_results):
129 # Update the cache. This is a path -> (data, version) mapping.
130 self._read_cache.SetMulti(
131 dict((path, (new_result, stat_futures[path].Get().version))
132 for path, new_result in new_results.iteritems()))
133 # Update the read cache to include files that weren't found, to prevent
134 # constantly trying to read a file we now know doesn't exist.
135 self._read_cache.SetMulti(
136 dict((path, (None, None)) for path in paths
137 if stat_futures[path].Get() is None))
138 new_results.update(up_to_date_data)
140 # Read in the values that were uncached or old.
141 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
142 skip_not_found=skip_not_found).Then(next)
144 def GetCommitID(self):
145 return self._file_system.GetCommitID()
147 def GetPreviousCommitID(self):
148 return self._file_system.GetPreviousCommitID()
150 def Walk(self, root, depth=-1):
151 '''Overrides FileSystem.Walk() to provide caching functionality.
153 def file_lister(root):
154 res, root_stat = All((self._walk_cache.Get(root),
155 self.StatAsync(root))).Get()
157 if res and res[2] == root_stat.version:
158 dirs, files = res[0], res[1]
160 # Wasn't cached, or not up to date.
162 for f in self.ReadSingle(root).Get():
167 # Update the cache. This is a root -> (dirs, files, version) mapping.
168 self._walk_cache.Set(root, (dirs, files, root_stat.version))
170 return self._file_system.Walk(root, depth=depth, file_lister=file_lister)
172 def GetCommitID(self):
173 return self._file_system.GetCommitID()
175 def GetPreviousCommitID(self):
176 return self._file_system.GetPreviousCommitID()
178 def GetIdentity(self):
179 return self._file_system.GetIdentity()
182 return '%s of <%s>' % (type(self).__name__, repr(self._file_system))