1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // A Predictor object is instantiated once in the browser process, and manages
6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected
8 // Most hostname lists are provided by the renderer processes, and include URLs
9 // that *might* be used in the near future by the browsing user. One goal of
10 // this class is to cause the underlying DNS structure to lookup a hostname
11 // before it is really needed, and hence reduce latency in the standard lookup
13 // Subresource relationships are usually acquired from the referrer field in a
14 // navigation. A subresource URL may be associated with a referrer URL. Later
15 // navigations may, if the likelihood of needing the subresource is high enough,
16 // cause this module to speculatively create a TCP/IP connection. If there is
17 // only a low likelihood, then a DNS pre-resolution operation may be performed.
19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_
20 #define CHROME_BROWSER_NET_PREDICTOR_H_
28 #include "base/gtest_prod_util.h"
29 #include "base/memory/scoped_ptr.h"
30 #include "base/memory/weak_ptr.h"
31 #include "chrome/browser/net/referrer.h"
32 #include "chrome/browser/net/timed_cache.h"
33 #include "chrome/browser/net/url_info.h"
34 #include "chrome/common/net/predictor_common.h"
35 #include "net/base/host_port_pair.h"
48 class URLRequestContextGetter;
51 namespace user_prefs {
52 class PrefRegistrySyncable;
55 namespace chrome_browser_net {
57 typedef chrome_common_net::UrlList UrlList;
58 typedef chrome_common_net::NameList NameList;
59 typedef std::map<GURL, UrlInfo> Results;
61 // Predictor is constructed during Profile construction (on the UI thread),
62 // but it is destroyed on the IO thread when ProfileIOData goes away. All of
63 // its core state and functionality happens on the IO thread. The only UI
64 // methods are initialization / shutdown related (including preconnect
65 // initialization), or convenience methods that internally forward calls to
69 // A version number for prefs that are saved. This should be incremented when
70 // we change the format so that we discard old data.
71 static const int kPredictorReferrerVersion;
73 // Given that the underlying Chromium resolver defaults to a total maximum of
74 // 8 paralell resolutions, we will avoid any chance of starving navigational
75 // resolutions by limiting the number of paralell speculative resolutions.
76 // This is used in the field trials and testing.
77 // TODO(jar): Move this limitation into the resolver.
78 static const size_t kMaxSpeculativeParallelResolves;
80 // To control the congestion avoidance system, we need an estimate of how
81 // many speculative requests may arrive at once. Since we currently only
82 // keep 8 subresource names for each frame, we'll use that as our basis.
83 // Note that when scanning search results lists, we might actually get 10 at
84 // a time, and wikipedia can often supply (during a page scan) upwards of 50.
85 // In those odd cases, we may discard some of the later speculative requests
86 // mistakenly assuming that the resolutions took too long.
87 static const int kTypicalSpeculativeGroupSize;
89 // The next constant specifies an amount of queueing delay that is
90 // "too large," and indicative of problems with resolutions (perhaps due to
91 // an overloaded router, or such). When we exceed this delay, congestion
92 // avoidance will kick in and all speculations in the queue will be discarded.
93 static const int kMaxSpeculativeResolveQueueDelayMs;
95 // We don't bother learning to preconnect via a GET if the original URL
96 // navigation was so long ago, that a preconnection would have been dropped
97 // anyway. We believe most servers will drop the connection in 10 seconds, so
98 // we currently estimate this time-till-drop at 10 seconds.
99 // TODO(jar): We should do a persistent field trial to validate/optimize this.
100 static const int kMaxUnusedSocketLifetimeSecondsWithoutAGet;
102 // |max_concurrent| specifies how many concurrent (parallel) prefetches will
103 // be performed. Host lookups will be issued through |host_resolver|.
104 explicit Predictor(bool preconnect_enabled);
106 virtual ~Predictor();
108 // This function is used to create a predictor. For testing, we can create
109 // a version which does a simpler shutdown.
110 static Predictor* CreatePredictor(bool preconnect_enabled,
111 bool simple_shutdown);
113 static void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry);
115 // ------------- Start UI thread methods.
117 virtual void InitNetworkPredictor(PrefService* user_prefs,
118 PrefService* local_state,
120 net::URLRequestContextGetter* getter);
122 // The Omnibox has proposed a given url to the user, and if it is a search
123 // URL, then it also indicates that this is preconnectable (i.e., we could
124 // preconnect to the search server).
125 void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);
127 // Preconnect a URL and all of its subresource domains.
128 void PreconnectUrlAndSubresources(const GURL& url,
129 const GURL& first_party_for_cookies);
131 static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs,
132 PrefService* local_state);
134 static void set_max_queueing_delay(int max_queueing_delay_ms);
136 static void set_max_parallel_resolves(size_t max_parallel_resolves);
138 virtual void ShutdownOnUIThread();
140 // ------------- End UI thread methods.
142 // ------------- Start IO thread methods.
144 // Cancel pending requests and prevent new ones from being made.
147 // In some circumstances, for privacy reasons, all results should be
148 // discarded. This method gracefully handles that activity.
149 // Destroy all our internal state, which shows what names we've looked up, and
150 // how long each has taken, etc. etc. We also destroy records of suggesses
151 // (cache hits etc.).
152 void DiscardAllResults();
154 // Add hostname(s) to the queue for processing.
155 void ResolveList(const UrlList& urls,
156 UrlInfo::ResolutionMotivation motivation);
158 void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation);
160 // Record details of a navigation so that we can preresolve the host name
161 // ahead of time the next time the users navigates to the indicated host.
162 // Should only be called when urls are distinct, and they should already be
163 // canonicalized to not have a path.
164 void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);
166 // When displaying info in about:dns, the following API is called.
167 static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output);
169 // Dump HTML table containing list of referrers for about:dns.
170 void GetHtmlReferrerLists(std::string* output);
172 // Dump the list of currently known referrer domains and related prefetchable
173 // domains for about:dns.
174 void GetHtmlInfo(std::string* output);
176 // Discards any referrer for which all the suggested host names are currently
177 // annotated with negligible expected-use. Scales down (diminishes) the
178 // expected-use of those that remain, so that their use will go down by a
179 // factor each time we trim (moving the referrer closer to being discarded in
181 // The task is performed synchronously and completes before returing.
182 void TrimReferrersNow();
184 // Construct a ListValue object that contains all the data in the referrers_
185 // so that it can be persisted in a pref.
186 void SerializeReferrers(base::ListValue* referral_list);
188 // Process a ListValue that contains all the data from a previous reference
189 // list, as constructed by SerializeReferrers(), and add all the identified
190 // values into the current referrer list.
191 void DeserializeReferrers(const base::ListValue& referral_list);
193 void DeserializeReferrersThenDelete(base::ListValue* referral_list);
195 void DiscardInitialNavigationHistory();
197 void FinalizeInitializationOnIOThread(
198 const std::vector<GURL>& urls_to_prefetch,
199 base::ListValue* referral_list,
201 bool predictor_enabled);
203 // During startup, we learn what the first N urls visited are, and then
204 // resolve the associated hosts ASAP during our next startup.
205 void LearnAboutInitialNavigation(const GURL& url);
207 // Renderer bundles up list and sends to this browser API via IPC.
208 // TODO(jar): Use UrlList instead to include port and scheme.
209 void DnsPrefetchList(const NameList& hostnames);
211 // May be called from either the IO or UI thread and will PostTask
212 // to the IO thread if necessary.
213 void DnsPrefetchMotivatedList(const UrlList& urls,
214 UrlInfo::ResolutionMotivation motivation);
216 // May be called from either the IO or UI thread and will PostTask
217 // to the IO thread if necessary.
218 void SaveStateForNextStartupAndTrim(PrefService* prefs);
220 void SaveDnsPrefetchStateForNextStartupAndTrim(
221 base::ListValue* startup_list,
222 base::ListValue* referral_list,
223 base::WaitableEvent* completion);
225 // May be called from either the IO or UI thread and will PostTask
226 // to the IO thread if necessary.
227 void EnablePredictor(bool enable);
229 void EnablePredictorOnIOThread(bool enable);
231 // May be called from either the IO or UI thread and will PostTask
232 // to the IO thread if necessary.
233 void PreconnectUrl(const GURL& url, const GURL& first_party_for_cookies,
234 UrlInfo::ResolutionMotivation motivation, int count);
236 void PreconnectUrlOnIOThread(const GURL& url,
237 const GURL& first_party_for_cookies,
238 UrlInfo::ResolutionMotivation motivation,
241 void RecordPreconnectTrigger(const GURL& url);
243 void RecordPreconnectNavigationStat(const std::vector<GURL>& url_chain,
244 bool is_subresource);
246 void RecordLinkNavigation(const GURL& url);
248 // ------------- End IO thread methods.
250 // The following methods may be called on either the IO or UI threads.
252 // Instigate pre-connection to any URLs, or pre-resolution of related host,
253 // that we predict will be needed after this navigation (typically
254 // more-embedded resources on a page). This method will actually post a task
255 // to do the actual work, so as not to jump ahead of the frame navigation that
256 // instigated this activity.
257 void PredictFrameSubresources(const GURL& url,
258 const GURL& first_party_for_cookies);
260 // Put URL in canonical form, including a scheme, host, and port.
261 // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
262 // cannot be otherwise canonicalized.
263 static GURL CanonicalizeUrl(const GURL& url);
266 void SetHostResolver(net::HostResolver* host_resolver) {
267 host_resolver_ = host_resolver;
270 size_t max_concurrent_dns_lookups() const {
271 return max_concurrent_dns_lookups_;
274 void SetShutdown(bool shutdown) {
275 shutdown_ = shutdown;
278 // Flag setting to use preconnection instead of just DNS pre-fetching.
279 bool preconnect_enabled() const {
280 return preconnect_enabled_;
283 // Flag setting for whether we are prefetching dns lookups.
284 bool predictor_enabled() const {
285 return predictor_enabled_;
290 FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
291 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
292 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
293 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
294 FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
295 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
296 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
297 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest);
298 friend class WaitForResolutionHelper; // For testing.
302 // A simple priority queue for handling host names.
303 // Some names that are queued up have |motivation| that requires very rapid
304 // handling. For example, a sub-resource name lookup MUST be done before the
305 // actual sub-resource is fetched. In contrast, a name that was speculatively
306 // noted in a page has to be resolved before the user "gets around to"
307 // clicking on a link. By tagging (with a motivation) each push we make into
308 // this FIFO queue, the queue can re-order the more important names to service
309 // them sooner (relative to some low priority background resolutions).
310 class HostNameQueue {
314 void Push(const GURL& url,
315 UrlInfo::ResolutionMotivation motivation);
316 bool IsEmpty() const;
320 // The names in the queue that should be serviced (popped) ASAP.
321 std::queue<GURL> rush_queue_;
322 // The names in the queue that should only be serviced when rush_queue is
324 std::queue<GURL> background_queue_;
326 DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
329 // The InitialObserver monitors navigations made by the network stack. This
330 // is only used to identify startup time resolutions (for re-resolution
331 // during our next process startup).
332 // TODO(jar): Consider preconnecting at startup, which may be faster than
333 // waiting for render process to start and request a connection.
334 class InitialObserver {
338 // Recording of when we observed each navigation.
339 typedef std::map<GURL, base::TimeTicks> FirstNavigations;
341 // Potentially add a new URL to our startup list.
342 void Append(const GURL& url, Predictor* predictor);
344 // Get an HTML version of our current planned first_navigations_.
345 void GetFirstResolutionsHtml(std::string* output);
347 // Persist the current first_navigations_ for storage in a list.
348 void GetInitialDnsResolutionList(base::ListValue* startup_list);
350 // Discards all initial loading history.
351 void DiscardInitialNavigationHistory() { first_navigations_.clear(); }
354 // List of the first N URL resolutions observed in this run.
355 FirstNavigations first_navigations_;
357 // The number of URLs we'll save for pre-resolving at next startup.
358 static const size_t kStartupResolutionCount = 10;
361 // A map that is keyed with the host/port that we've learned were the cause
362 // of loading additional URLs. The list of additional targets is held
363 // in a Referrer instance, which is a value in this map.
364 typedef std::map<GURL, Referrer> Referrers;
366 // Depending on the expected_subresource_use_, we may either make a TCP/IP
367 // preconnection, or merely pre-resolve the hostname via DNS (or even do
368 // nothing). The following are the threasholds for taking those actions.
369 static const double kPreconnectWorthyExpectedValue;
370 static const double kDNSPreresolutionWorthyExpectedValue;
371 // Referred hosts with a subresource_use_rate_ that are less than the
372 // following threshold will be discarded when we Trim() the list.
373 static const double kDiscardableExpectedValue;
374 // During trimming operation to discard hosts for which we don't have likely
375 // subresources, we multiply the expected_subresource_use_ value by the
376 // following ratio until that value is less than kDiscardableExpectedValue.
377 // This number should always be less than 1, an more than 0.
378 static const double kReferrerTrimRatio;
380 // Interval between periodic trimming of our whole referrer list.
381 // We only do a major trimming about once an hour, and then only when the user
382 // is actively browsing.
383 static const int64 kDurationBetweenTrimmingsHours;
384 // Interval between incremental trimmings (to avoid inducing Jank).
385 static const int64 kDurationBetweenTrimmingIncrementsSeconds;
386 // Number of referring URLs processed in an incremental trimming.
387 static const size_t kUrlsTrimmedPerIncrement;
389 // Only for testing. Returns true if hostname has been successfully resolved
391 bool WasFound(const GURL& url) const {
392 Results::const_iterator it(results_.find(url));
393 return (it != results_.end()) &&
394 it->second.was_found();
397 // Only for testing. Return how long was the resolution
398 // or UrlInfo::NullDuration() if it hasn't been resolved yet.
399 base::TimeDelta GetResolutionDuration(const GURL& url) {
400 if (results_.find(url) == results_.end())
401 return UrlInfo::NullDuration();
402 return results_[url].resolve_duration();
406 size_t peak_pending_lookups() const { return peak_pending_lookups_; }
408 // ------------- Start IO thread methods.
410 // Perform actual resolution or preconnection to subresources now. This is
411 // an internal worker method that is reached via a post task from
412 // PredictFrameSubresources().
413 void PrepareFrameSubresources(const GURL& url,
414 const GURL& first_party_for_cookies);
416 // Access method for use by async lookup request to pass resolution result.
417 void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);
419 // Underlying method for both async and synchronous lookup to update state.
420 void LookupFinished(LookupRequest* request,
421 const GURL& url, bool found);
423 // Queue hostname for resolution. If queueing was done, return the pointer
424 // to the queued instance, otherwise return NULL.
425 UrlInfo* AppendToResolutionQueue(const GURL& url,
426 UrlInfo::ResolutionMotivation motivation);
428 // Check to see if too much queuing delay has been noted for the given info,
429 // which indicates that there is "congestion" or growing delay in handling the
430 // resolution of names. Rather than letting this congestion potentially grow
431 // without bounds, we abandon our queued efforts at pre-resolutions in such a
433 // To do this, we will recycle |info|, as well as all queued items, back to
434 // the state they had before they were queued up. We can't do anything about
435 // the resolutions we've already sent off for processing on another thread, so
436 // we just let them complete. On a slow system, subject to congestion, this
437 // will greatly reduce the number of resolutions done, but it will assure that
438 // any resolutions that are done, are in a timely and hence potentially
440 bool CongestionControlPerformed(UrlInfo* info);
442 // Take lookup requests from work_queue_ and tell HostResolver to look them up
443 // asynchronously, provided we don't exceed concurrent resolution limit.
444 void StartSomeQueuedResolutions();
446 // Performs trimming similar to TrimReferrersNow(), except it does it as a
447 // series of short tasks by posting continuations again an again until done.
448 void TrimReferrers();
450 // Loads urls_being_trimmed_ from keys of current referrers_.
451 void LoadUrlsForTrimming();
453 // Posts a task to do additional incremental trimming of referrers_.
454 void PostIncrementalTrimTask();
456 // Calls Trim() on some or all of urls_being_trimmed_.
457 // If it does not process all the URLs in that vector, it posts a task to
458 // continue with them shortly (i.e., it yeilds and continues).
459 void IncrementalTrimReferrers(bool trim_all_now);
461 // ------------- End IO thread methods.
463 scoped_ptr<InitialObserver> initial_observer_;
465 // Reference to URLRequestContextGetter from the Profile which owns the
466 // predictor. Used by Preconnect.
467 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
469 // Status of speculative DNS resolution and speculative TCP/IP connection
471 bool predictor_enabled_;
473 // work_queue_ holds a list of names we need to look up.
474 HostNameQueue work_queue_;
476 // results_ contains information for existing/prior prefetches.
479 std::set<LookupRequest*> pending_lookups_;
481 // For testing, to verify that we don't exceed the limit.
482 size_t peak_pending_lookups_;
484 // When true, we don't make new lookup requests.
487 // The number of concurrent speculative lookups currently allowed to be sent
488 // to the resolver. Any additional lookups will be queued to avoid exceeding
489 // this value. The queue is a priority queue that will accelerate
490 // sub-resource speculation, and retard resolutions suggested by page scans.
491 const size_t max_concurrent_dns_lookups_;
493 // The maximum queueing delay that is acceptable before we enter congestion
494 // reduction mode, and discard all queued (but not yet assigned) resolutions.
495 const base::TimeDelta max_dns_queue_delay_;
497 // The host resolver we warm DNS entries for.
498 net::HostResolver* host_resolver_;
500 // Are we currently using preconnection, rather than just DNS resolution, for
501 // subresources and omni-box search URLs.
502 bool preconnect_enabled_;
504 // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
505 std::string last_omnibox_host_;
507 // The time when the last preresolve was done for last_omnibox_host_.
508 base::TimeTicks last_omnibox_preresolve_;
510 // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
511 // preconnecting (because it was to a search service).
512 int consecutive_omnibox_preconnect_count_;
514 // The time when the last preconnection was requested to a search service.
515 base::TimeTicks last_omnibox_preconnect_;
517 class PreconnectUsage;
518 scoped_ptr<PreconnectUsage> preconnect_usage_;
520 // For each URL that we might navigate to (that we've "learned about")
521 // we have a Referrer list. Each Referrer list has all hostnames we might
522 // need to pre-resolve or pre-connect to when there is a navigation to the
523 // orginial hostname.
524 Referrers referrers_;
526 // List of URLs in referrers_ currently being trimmed (scaled down to
527 // eventually be aged out of use).
528 std::vector<GURL> urls_being_trimmed_;
530 // A time after which we need to do more trimming of referrers.
531 base::TimeTicks next_trim_time_;
533 scoped_ptr<base::WeakPtrFactory<Predictor> > weak_factory_;
535 DISALLOW_COPY_AND_ASSIGN(Predictor);
538 // This version of the predictor is used for testing.
539 class SimplePredictor : public Predictor {
541 explicit SimplePredictor(bool preconnect_enabled)
542 : Predictor(preconnect_enabled) {}
543 virtual ~SimplePredictor() {}
544 virtual void InitNetworkPredictor(
545 PrefService* user_prefs,
546 PrefService* local_state,
548 net::URLRequestContextGetter* getter) OVERRIDE;
549 virtual void ShutdownOnUIThread() OVERRIDE;
552 } // namespace chrome_browser_net
554 #endif // CHROME_BROWSER_NET_PREDICTOR_H_