1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
9 from app_yaml_helper import AppYamlHelper
10 from appengine_wrappers import (
11 GetAppVersion, IsDeadlineExceededError, logservice)
12 from branch_utility import BranchUtility
13 from compiled_file_system import CompiledFileSystem
14 from data_source_registry import CreateDataSources
15 from environment import IsDevServer
16 from extensions_paths import EXAMPLES, PUBLIC_TEMPLATES, STATIC_DOCS
17 from file_system_util import CreateURLsFromPaths
18 from future import Future
19 from gcs_file_system_provider import CloudStorageFileSystemProvider
20 from github_file_system_provider import GithubFileSystemProvider
21 from host_file_system_provider import HostFileSystemProvider
22 from object_store_creator import ObjectStoreCreator
23 from render_servlet import RenderServlet
24 from server_instance import ServerInstance
25 from servlet import Servlet, Request, Response
26 from special_paths import SITE_VERIFICATION_FILE
27 from timer import Timer, TimerClosure
30 class _SingletonRenderServletDelegate(RenderServlet.Delegate):
31 def __init__(self, server_instance):
32 self._server_instance = server_instance
34 def CreateServerInstance(self):
35 return self._server_instance
37 class _CronLogger(object):
38 '''Wraps the logging.* methods to prefix them with 'cron' and flush
39 immediately. The flushing is important because often these cron runs time
40 out and we lose the logs.
42 def info(self, msg, *args): self._log(logging.info, msg, args)
43 def warning(self, msg, *args): self._log(logging.warning, msg, args)
44 def error(self, msg, *args): self._log(logging.error, msg, args)
46 def _log(self, logfn, msg, args):
48 logfn('cron: %s' % msg, *args)
52 _cronlog = _CronLogger()
54 def _RequestEachItem(title, items, request_callback):
55 '''Runs a task |request_callback| named |title| for each item in |items|.
56 |request_callback| must take an item and return a servlet response.
57 Returns true if every item was successfully run, false if any return a
58 non-200 response or raise an exception.
60 _cronlog.info('%s: starting', title)
61 success_count, failure_count = 0, 0
64 for i, item in enumerate(items):
65 def error_message(detail):
66 return '%s: error rendering %s (%s of %s): %s' % (
67 title, item, i + 1, len(items), detail)
69 response = request_callback(item)
70 if response.status == 200:
73 _cronlog.error(error_message('response status %s' % response.status))
75 except Exception as e:
76 _cronlog.error(error_message(traceback.format_exc()))
78 if IsDeadlineExceededError(e): raise
80 _cronlog.info('%s: rendered %s of %s with %s failures in %s',
81 title, success_count, len(items), failure_count,
82 timer.Stop().FormatElapsed())
83 return success_count == len(items)
85 class CronServlet(Servlet):
86 '''Servlet which runs a cron job.
88 def __init__(self, request, delegate_for_test=None):
89 Servlet.__init__(self, request)
90 self._delegate = delegate_for_test or CronServlet.Delegate()
92 class Delegate(object):
93 '''CronServlet's runtime dependencies. Override for testing.
95 def CreateBranchUtility(self, object_store_creator):
96 return BranchUtility.Create(object_store_creator)
98 def CreateHostFileSystemProvider(self,
100 max_trunk_revision=None):
101 return HostFileSystemProvider(object_store_creator,
102 max_trunk_revision=max_trunk_revision)
104 def CreateGithubFileSystemProvider(self, object_store_creator):
105 return GithubFileSystemProvider(object_store_creator)
107 def CreateGCSFileSystemProvider(self, object_store_creator):
108 return CloudStorageFileSystemProvider(object_store_creator)
110 def GetAppVersion(self):
111 return GetAppVersion()
114 # Crons often time out, and if they do we need to make sure to flush the
115 # logs before the process gets killed (Python gives us a couple of
118 # So, manually flush logs at the end of the cron run. However, sometimes
119 # even that isn't enough, which is why in this file we use _cronlog and
120 # make it flush the log every time its used.
121 logservice.AUTOFLUSH_ENABLED = False
123 return self._GetImpl()
124 except BaseException:
125 _cronlog.error('Caught top-level exception! %s', traceback.format_exc())
132 # Find all public template files and static files, and render them. Most of
133 # the time these won't have changed since the last cron run, so it's a
134 # little wasteful, but hopefully rendering is really fast (if it isn't we
136 _cronlog.info('starting')
138 # This is returned every time RenderServlet wants to create a new
141 # TODO(kalman): IMPORTANT. This sometimes throws an exception, breaking
142 # everything. Need retry logic at the fetcher level.
143 server_instance = self._GetSafeServerInstance()
144 trunk_fs = server_instance.host_file_system_provider.GetTrunk()
147 request = Request(path, self._request.host, self._request.headers)
148 delegate = _SingletonRenderServletDelegate(server_instance)
149 return RenderServlet(request, delegate).Get()
151 def request_files_in_dir(path, prefix='', strip_ext=None):
152 '''Requests every file found under |path| in this host file system, with
153 a request prefix of |prefix|. |strip_ext| is an optional list of file
154 extensions that should be stripped from paths before requesting.
156 def maybe_strip_ext(name):
157 if name == SITE_VERIFICATION_FILE or not strip_ext:
159 base, ext = posixpath.splitext(name)
160 return base if ext in strip_ext else name
161 files = [maybe_strip_ext(name)
162 for name, _ in CreateURLsFromPaths(trunk_fs, path, prefix)]
163 return _RequestEachItem(path, files, render)
168 # Start running the hand-written Cron methods first; they can be run in
169 # parallel. They are resolved at the end.
170 def run_cron_for_future(target):
171 title = target.__class__.__name__
172 future, init_timer = TimerClosure(target.Cron)
173 assert isinstance(future, Future), (
174 '%s.Cron() did not return a Future' % title)
176 resolve_timer = Timer()
179 except Exception as e:
180 _cronlog.error('%s: error %s' % (title, traceback.format_exc()))
181 results.append(False)
182 if IsDeadlineExceededError(e): raise
185 _cronlog.info('%s took %s: %s to initialize and %s to resolve' %
187 init_timer.With(resolve_timer).FormatElapsed(),
188 init_timer.FormatElapsed(),
189 resolve_timer.FormatElapsed()))
190 return Future(callback=resolve)
192 targets = (CreateDataSources(server_instance).values() +
193 [server_instance.content_providers,
194 server_instance.api_models])
195 title = 'initializing %s parallel Cron targets' % len(targets)
199 cron_futures = [run_cron_for_future(target) for target in targets]
201 _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
203 # Samples are too expensive to run on the dev server, where there is no
206 # XXX(kalman): Currently samples are *always* too expensive to fetch, so
207 # disabling them for now. It won't break anything so long as we're still
208 # not enforcing that everything gets cached for normal instances.
209 if False: # should be "not IsDevServer()":
210 # Fetch each individual sample file.
211 results.append(request_files_in_dir(EXAMPLES,
212 prefix='extensions/examples'))
214 # Resolve the hand-written Cron method futures.
215 title = 'resolving %s parallel Cron targets' % len(targets)
219 for future in cron_futures:
222 _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
225 results.append(False)
226 # This should never actually happen (each cron step does its own
227 # conservative error checking), so re-raise no matter what it is.
228 _cronlog.error('uncaught error: %s' % traceback.format_exc())
231 success = all(results)
232 _cronlog.info('finished (%s)', 'success' if success else 'FAILED')
233 return (Response.Ok('Success') if success else
234 Response.InternalError('Failure'))
236 def _GetSafeServerInstance(self):
237 '''Returns a ServerInstance with a host file system at a safe revision,
238 meaning the last revision that the current running version of the server
241 delegate = self._delegate
243 # IMPORTANT: Get a ServerInstance pinned to the most recent revision, not
244 # HEAD. These cron jobs take a while and run very frequently such that
245 # there is usually one running at any given time, and eventually a file
246 # that we're dealing with will change underneath it, putting the server in
247 # an undefined state.
248 server_instance_near_head = self._CreateServerInstance(
249 self._GetMostRecentRevision())
251 app_yaml_handler = AppYamlHelper(
252 server_instance_near_head.object_store_creator,
253 server_instance_near_head.host_file_system_provider)
255 if app_yaml_handler.IsUpToDate(delegate.GetAppVersion()):
256 return server_instance_near_head
258 # The version in app.yaml is greater than the currently running app's.
259 # The safe version is the one before it changed.
260 safe_revision = app_yaml_handler.GetFirstRevisionGreaterThan(
261 delegate.GetAppVersion()) - 1
263 _cronlog.info('app version %s is out of date, safe is %s',
264 delegate.GetAppVersion(), safe_revision)
266 return self._CreateServerInstance(safe_revision)
268 def _GetMostRecentRevision(self):
269 '''Gets the revision of the most recent patch submitted to the host file
270 system. This is similar to HEAD but it's a concrete revision so won't
271 change as the cron runs.
274 self._CreateServerInstance(None).host_file_system_provider.GetTrunk())
275 return head_fs.Stat('').version
277 def _CreateServerInstance(self, revision):
278 '''Creates a ServerInstance pinned to |revision|, or HEAD if None.
279 NOTE: If passed None it's likely that during the cron run patches will be
280 submitted at HEAD, which may change data underneath the cron run.
282 object_store_creator = ObjectStoreCreator(start_empty=True)
283 branch_utility = self._delegate.CreateBranchUtility(object_store_creator)
284 host_file_system_provider = self._delegate.CreateHostFileSystemProvider(
285 object_store_creator, max_trunk_revision=revision)
286 github_file_system_provider = self._delegate.CreateGithubFileSystemProvider(
287 object_store_creator)
288 gcs_file_system_provider = self._delegate.CreateGCSFileSystemProvider(
289 object_store_creator)
290 return ServerInstance(object_store_creator,
291 CompiledFileSystem.Factory(object_store_creator),
293 host_file_system_provider,
294 github_file_system_provider,
295 gcs_file_system_provider)