1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // A Predictor object is instantiated once in the browser process, and manages
6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected
8 // Most hostname lists are provided by the renderer processes, and include URLs
9 // that *might* be used in the near future by the browsing user. One goal of
10 // this class is to cause the underlying DNS structure to lookup a hostname
11 // before it is really needed, and hence reduce latency in the standard lookup
13 // Subresource relationships are usually acquired from the referrer field in a
14 // navigation. A subresource URL may be associated with a referrer URL. Later
15 // navigations may, if the likelihood of needing the subresource is high enough,
16 // cause this module to speculatively create a TCP/IP connection. If there is
17 // only a low likelihood, then a DNS pre-resolution operation may be performed.
19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_
20 #define CHROME_BROWSER_NET_PREDICTOR_H_
28 #include "base/gtest_prod_util.h"
29 #include "base/memory/scoped_ptr.h"
30 #include "base/memory/weak_ptr.h"
31 #include "chrome/browser/net/referrer.h"
32 #include "chrome/browser/net/spdyproxy/proxy_advisor.h"
33 #include "chrome/browser/net/timed_cache.h"
34 #include "chrome/browser/net/url_info.h"
35 #include "chrome/common/net/predictor_common.h"
36 #include "net/base/host_port_pair.h"
49 class SSLConfigService;
50 class TransportSecurityState;
51 class URLRequestContextGetter;
54 namespace user_prefs {
55 class PrefRegistrySyncable;
58 namespace chrome_browser_net {
60 typedef chrome_common_net::UrlList UrlList;
61 typedef chrome_common_net::NameList NameList;
62 typedef std::map<GURL, UrlInfo> Results;
64 // An observer for testing.
65 class PredictorObserver {
67 virtual ~PredictorObserver() {}
69 virtual void OnPreconnectUrl(const GURL& original_url,
70 const GURL& first_party_for_cookies,
71 UrlInfo::ResolutionMotivation motivation,
75 // Predictor is constructed during Profile construction (on the UI thread),
76 // but it is destroyed on the IO thread when ProfileIOData goes away. All of
77 // its core state and functionality happens on the IO thread. The only UI
78 // methods are initialization / shutdown related (including preconnect
79 // initialization), or convenience methods that internally forward calls to
83 // A version number for prefs that are saved. This should be incremented when
84 // we change the format so that we discard old data.
85 static const int kPredictorReferrerVersion;
87 // Given that the underlying Chromium resolver defaults to a total maximum of
88 // 8 paralell resolutions, we will avoid any chance of starving navigational
89 // resolutions by limiting the number of paralell speculative resolutions.
90 // This is used in the field trials and testing.
91 // TODO(jar): Move this limitation into the resolver.
92 static const size_t kMaxSpeculativeParallelResolves;
94 // To control the congestion avoidance system, we need an estimate of how
95 // many speculative requests may arrive at once. Since we currently only
96 // keep 8 subresource names for each frame, we'll use that as our basis.
97 // Note that when scanning search results lists, we might actually get 10 at
98 // a time, and wikipedia can often supply (during a page scan) upwards of 50.
99 // In those odd cases, we may discard some of the later speculative requests
100 // mistakenly assuming that the resolutions took too long.
101 static const int kTypicalSpeculativeGroupSize;
103 // The next constant specifies an amount of queueing delay that is
104 // "too large," and indicative of problems with resolutions (perhaps due to
105 // an overloaded router, or such). When we exceed this delay, congestion
106 // avoidance will kick in and all speculations in the queue will be discarded.
107 static const int kMaxSpeculativeResolveQueueDelayMs;
109 // We don't bother learning to preconnect via a GET if the original URL
110 // navigation was so long ago, that a preconnection would have been dropped
111 // anyway. We believe most servers will drop the connection in 10 seconds, so
112 // we currently estimate this time-till-drop at 10 seconds.
113 // TODO(jar): We should do a persistent field trial to validate/optimize this.
114 static const int kMaxUnusedSocketLifetimeSecondsWithoutAGet;
116 // |max_concurrent| specifies how many concurrent (parallel) prefetches will
117 // be performed. Host lookups will be issued through |host_resolver|.
118 explicit Predictor(bool preconnect_enabled);
120 virtual ~Predictor();
122 // This function is used to create a predictor. For testing, we can create
123 // a version which does a simpler shutdown.
124 static Predictor* CreatePredictor(bool preconnect_enabled,
125 bool simple_shutdown);
127 static void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry);
129 // ------------- Start UI thread methods.
131 virtual void InitNetworkPredictor(PrefService* user_prefs,
132 PrefService* local_state,
134 net::URLRequestContextGetter* getter);
136 // The Omnibox has proposed a given url to the user, and if it is a search
137 // URL, then it also indicates that this is preconnectable (i.e., we could
138 // preconnect to the search server).
139 void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);
141 // Preconnect a URL and all of its subresource domains.
142 void PreconnectUrlAndSubresources(const GURL& url,
143 const GURL& first_party_for_cookies);
145 static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs,
146 PrefService* local_state);
148 static void set_max_queueing_delay(int max_queueing_delay_ms);
150 static void set_max_parallel_resolves(size_t max_parallel_resolves);
152 virtual void ShutdownOnUIThread();
154 // ------------- End UI thread methods.
156 // ------------- Start IO thread methods.
158 // Cancel pending requests and prevent new ones from being made.
161 // In some circumstances, for privacy reasons, all results should be
162 // discarded. This method gracefully handles that activity.
163 // Destroy all our internal state, which shows what names we've looked up, and
164 // how long each has taken, etc. etc. We also destroy records of suggesses
165 // (cache hits etc.).
166 void DiscardAllResults();
168 // Add hostname(s) to the queue for processing.
169 void ResolveList(const UrlList& urls,
170 UrlInfo::ResolutionMotivation motivation);
172 void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation);
174 // Record details of a navigation so that we can preresolve the host name
175 // ahead of time the next time the users navigates to the indicated host.
176 // Should only be called when urls are distinct, and they should already be
177 // canonicalized to not have a path.
178 void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);
180 // When displaying info in about:dns, the following API is called.
181 static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output);
183 // Dump HTML table containing list of referrers for about:dns.
184 void GetHtmlReferrerLists(std::string* output);
186 // Dump the list of currently known referrer domains and related prefetchable
187 // domains for about:dns.
188 void GetHtmlInfo(std::string* output);
190 // Discards any referrer for which all the suggested host names are currently
191 // annotated with negligible expected-use. Scales down (diminishes) the
192 // expected-use of those that remain, so that their use will go down by a
193 // factor each time we trim (moving the referrer closer to being discarded in
195 // The task is performed synchronously and completes before returing.
196 void TrimReferrersNow();
198 // Construct a ListValue object that contains all the data in the referrers_
199 // so that it can be persisted in a pref.
200 void SerializeReferrers(base::ListValue* referral_list);
202 // Process a ListValue that contains all the data from a previous reference
203 // list, as constructed by SerializeReferrers(), and add all the identified
204 // values into the current referrer list.
205 void DeserializeReferrers(const base::ListValue& referral_list);
207 void DeserializeReferrersThenDelete(base::ListValue* referral_list);
209 void DiscardInitialNavigationHistory();
211 void FinalizeInitializationOnIOThread(
212 const std::vector<GURL>& urls_to_prefetch,
213 base::ListValue* referral_list,
215 bool predictor_enabled);
217 // During startup, we learn what the first N urls visited are, and then
218 // resolve the associated hosts ASAP during our next startup.
219 void LearnAboutInitialNavigation(const GURL& url);
221 // Renderer bundles up list and sends to this browser API via IPC.
222 // TODO(jar): Use UrlList instead to include port and scheme.
223 void DnsPrefetchList(const NameList& hostnames);
225 // May be called from either the IO or UI thread and will PostTask
226 // to the IO thread if necessary.
227 void DnsPrefetchMotivatedList(const UrlList& urls,
228 UrlInfo::ResolutionMotivation motivation);
230 // May be called from either the IO or UI thread and will PostTask
231 // to the IO thread if necessary.
232 void SaveStateForNextStartupAndTrim(PrefService* prefs);
234 void SaveDnsPrefetchStateForNextStartupAndTrim(
235 base::ListValue* startup_list,
236 base::ListValue* referral_list,
237 base::WaitableEvent* completion);
239 // May be called from either the IO or UI thread and will PostTask
240 // to the IO thread if necessary.
241 void EnablePredictor(bool enable);
243 void EnablePredictorOnIOThread(bool enable);
245 // May be called from either the IO or UI thread and will PostTask
246 // to the IO thread if necessary.
247 void PreconnectUrl(const GURL& url, const GURL& first_party_for_cookies,
248 UrlInfo::ResolutionMotivation motivation, int count);
250 void PreconnectUrlOnIOThread(const GURL& url,
251 const GURL& first_party_for_cookies,
252 UrlInfo::ResolutionMotivation motivation,
255 void RecordPreconnectTrigger(const GURL& url);
257 void RecordPreconnectNavigationStat(const std::vector<GURL>& url_chain,
258 bool is_subresource);
260 void RecordLinkNavigation(const GURL& url);
262 // ------------- End IO thread methods.
264 // The following methods may be called on either the IO or UI threads.
266 // Instigate pre-connection to any URLs, or pre-resolution of related host,
267 // that we predict will be needed after this navigation (typically
268 // more-embedded resources on a page). This method will actually post a task
269 // to do the actual work, so as not to jump ahead of the frame navigation that
270 // instigated this activity.
271 void PredictFrameSubresources(const GURL& url,
272 const GURL& first_party_for_cookies);
274 // Put URL in canonical form, including a scheme, host, and port.
275 // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
276 // cannot be otherwise canonicalized.
277 static GURL CanonicalizeUrl(const GURL& url);
280 void SetHostResolver(net::HostResolver* host_resolver) {
281 host_resolver_ = host_resolver;
284 void SetTransportSecurityState(
285 net::TransportSecurityState* transport_security_state) {
286 transport_security_state_ = transport_security_state;
289 void SetProxyAdvisor(ProxyAdvisor* proxy_advisor) {
290 proxy_advisor_.reset(proxy_advisor);
293 size_t max_concurrent_dns_lookups() const {
294 return max_concurrent_dns_lookups_;
297 void SetShutdown(bool shutdown) {
298 shutdown_ = shutdown;
301 void SetObserver(PredictorObserver* observer) {
302 observer_ = observer;
305 // Flag setting to use preconnection instead of just DNS pre-fetching.
306 bool preconnect_enabled() const {
307 return preconnect_enabled_;
310 // Flag setting for whether we are prefetching dns lookups.
311 bool predictor_enabled() const {
312 return predictor_enabled_;
317 FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
318 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
319 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
320 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
321 FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
322 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
323 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
324 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest);
325 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTestWithDisabledAdvisor);
326 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTestWithEnabledAdvisor);
327 FRIEND_TEST_ALL_PREFIXES(PredictorTest, TestSimplePreconnectAdvisor);
328 friend class WaitForResolutionHelper; // For testing.
332 // A simple priority queue for handling host names.
333 // Some names that are queued up have |motivation| that requires very rapid
334 // handling. For example, a sub-resource name lookup MUST be done before the
335 // actual sub-resource is fetched. In contrast, a name that was speculatively
336 // noted in a page has to be resolved before the user "gets around to"
337 // clicking on a link. By tagging (with a motivation) each push we make into
338 // this FIFO queue, the queue can re-order the more important names to service
339 // them sooner (relative to some low priority background resolutions).
340 class HostNameQueue {
344 void Push(const GURL& url,
345 UrlInfo::ResolutionMotivation motivation);
346 bool IsEmpty() const;
350 // The names in the queue that should be serviced (popped) ASAP.
351 std::queue<GURL> rush_queue_;
352 // The names in the queue that should only be serviced when rush_queue is
354 std::queue<GURL> background_queue_;
356 DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
359 // The InitialObserver monitors navigations made by the network stack. This
360 // is only used to identify startup time resolutions (for re-resolution
361 // during our next process startup).
362 // TODO(jar): Consider preconnecting at startup, which may be faster than
363 // waiting for render process to start and request a connection.
364 class InitialObserver {
368 // Recording of when we observed each navigation.
369 typedef std::map<GURL, base::TimeTicks> FirstNavigations;
371 // Potentially add a new URL to our startup list.
372 void Append(const GURL& url, Predictor* predictor);
374 // Get an HTML version of our current planned first_navigations_.
375 void GetFirstResolutionsHtml(std::string* output);
377 // Persist the current first_navigations_ for storage in a list.
378 void GetInitialDnsResolutionList(base::ListValue* startup_list);
380 // Discards all initial loading history.
381 void DiscardInitialNavigationHistory() { first_navigations_.clear(); }
384 // List of the first N URL resolutions observed in this run.
385 FirstNavigations first_navigations_;
387 // The number of URLs we'll save for pre-resolving at next startup.
388 static const size_t kStartupResolutionCount = 10;
391 // A map that is keyed with the host/port that we've learned were the cause
392 // of loading additional URLs. The list of additional targets is held
393 // in a Referrer instance, which is a value in this map.
394 typedef std::map<GURL, Referrer> Referrers;
396 // Depending on the expected_subresource_use_, we may either make a TCP/IP
397 // preconnection, or merely pre-resolve the hostname via DNS (or even do
398 // nothing). The following are the threasholds for taking those actions.
399 static const double kPreconnectWorthyExpectedValue;
400 static const double kDNSPreresolutionWorthyExpectedValue;
401 // Referred hosts with a subresource_use_rate_ that are less than the
402 // following threshold will be discarded when we Trim() the list.
403 static const double kDiscardableExpectedValue;
404 // During trimming operation to discard hosts for which we don't have likely
405 // subresources, we multiply the expected_subresource_use_ value by the
406 // following ratio until that value is less than kDiscardableExpectedValue.
407 // This number should always be less than 1, an more than 0.
408 static const double kReferrerTrimRatio;
410 // Interval between periodic trimming of our whole referrer list.
411 // We only do a major trimming about once an hour, and then only when the user
412 // is actively browsing.
413 static const int64 kDurationBetweenTrimmingsHours;
414 // Interval between incremental trimmings (to avoid inducing Jank).
415 static const int64 kDurationBetweenTrimmingIncrementsSeconds;
416 // Number of referring URLs processed in an incremental trimming.
417 static const size_t kUrlsTrimmedPerIncrement;
419 // Only for testing. Returns true if hostname has been successfully resolved
421 bool WasFound(const GURL& url) const {
422 Results::const_iterator it(results_.find(url));
423 return (it != results_.end()) &&
424 it->second.was_found();
427 // Only for testing. Return how long was the resolution
428 // or UrlInfo::NullDuration() if it hasn't been resolved yet.
429 base::TimeDelta GetResolutionDuration(const GURL& url) {
430 if (results_.find(url) == results_.end())
431 return UrlInfo::NullDuration();
432 return results_[url].resolve_duration();
436 size_t peak_pending_lookups() const { return peak_pending_lookups_; }
438 // If a proxy advisor is defined, let it know that |url| will be prefetched or
439 // preconnected to. Can be called on either UI or IO threads and will post to
440 // the IO thread if necessary, invoking AdviseProxyOnIOThread().
441 void AdviseProxy(const GURL& url,
442 UrlInfo::ResolutionMotivation motivation,
445 // ------------- Start IO thread methods.
447 // Perform actual resolution or preconnection to subresources now. This is
448 // an internal worker method that is reached via a post task from
449 // PredictFrameSubresources().
450 void PrepareFrameSubresources(const GURL& url,
451 const GURL& first_party_for_cookies);
453 // Access method for use by async lookup request to pass resolution result.
454 void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);
456 // Underlying method for both async and synchronous lookup to update state.
457 void LookupFinished(LookupRequest* request,
458 const GURL& url, bool found);
460 // Queue hostname for resolution. If queueing was done, return the pointer
461 // to the queued instance, otherwise return NULL. If the proxy advisor is
462 // enabled, and |url| is likely to be proxied, the hostname will not be
463 // queued as the browser is not expected to fetch it directly.
464 UrlInfo* AppendToResolutionQueue(const GURL& url,
465 UrlInfo::ResolutionMotivation motivation);
467 // Check to see if too much queuing delay has been noted for the given info,
468 // which indicates that there is "congestion" or growing delay in handling the
469 // resolution of names. Rather than letting this congestion potentially grow
470 // without bounds, we abandon our queued efforts at pre-resolutions in such a
472 // To do this, we will recycle |info|, as well as all queued items, back to
473 // the state they had before they were queued up. We can't do anything about
474 // the resolutions we've already sent off for processing on another thread, so
475 // we just let them complete. On a slow system, subject to congestion, this
476 // will greatly reduce the number of resolutions done, but it will assure that
477 // any resolutions that are done, are in a timely and hence potentially
479 bool CongestionControlPerformed(UrlInfo* info);
481 // Take lookup requests from work_queue_ and tell HostResolver to look them up
482 // asynchronously, provided we don't exceed concurrent resolution limit.
483 void StartSomeQueuedResolutions();
485 // Performs trimming similar to TrimReferrersNow(), except it does it as a
486 // series of short tasks by posting continuations again an again until done.
487 void TrimReferrers();
489 // Loads urls_being_trimmed_ from keys of current referrers_.
490 void LoadUrlsForTrimming();
492 // Posts a task to do additional incremental trimming of referrers_.
493 void PostIncrementalTrimTask();
495 // Calls Trim() on some or all of urls_being_trimmed_.
496 // If it does not process all the URLs in that vector, it posts a task to
497 // continue with them shortly (i.e., it yeilds and continues).
498 void IncrementalTrimReferrers(bool trim_all_now);
500 // If a proxy advisor is defined, let it know that |url| will be prefetched or
502 void AdviseProxyOnIOThread(const GURL& url,
503 UrlInfo::ResolutionMotivation motivation,
506 // Applies the HSTS redirect for |url|, if any.
507 GURL GetHSTSRedirectOnIOThread(const GURL& url);
509 // ------------- End IO thread methods.
511 scoped_ptr<InitialObserver> initial_observer_;
513 // Reference to URLRequestContextGetter from the Profile which owns the
514 // predictor. Used by Preconnect.
515 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
517 // Status of speculative DNS resolution and speculative TCP/IP connection
519 bool predictor_enabled_;
521 // work_queue_ holds a list of names we need to look up.
522 HostNameQueue work_queue_;
524 // results_ contains information for existing/prior prefetches.
527 std::set<LookupRequest*> pending_lookups_;
529 // For testing, to verify that we don't exceed the limit.
530 size_t peak_pending_lookups_;
532 // When true, we don't make new lookup requests.
535 // The number of concurrent speculative lookups currently allowed to be sent
536 // to the resolver. Any additional lookups will be queued to avoid exceeding
537 // this value. The queue is a priority queue that will accelerate
538 // sub-resource speculation, and retard resolutions suggested by page scans.
539 const size_t max_concurrent_dns_lookups_;
541 // The maximum queueing delay that is acceptable before we enter congestion
542 // reduction mode, and discard all queued (but not yet assigned) resolutions.
543 const base::TimeDelta max_dns_queue_delay_;
545 // The host resolver we warm DNS entries for.
546 net::HostResolver* host_resolver_;
548 // The TransportSecurityState instance we query HSTS redirects from.
549 net::TransportSecurityState* transport_security_state_;
551 // The SSLConfigService we query SNI support from (used in querying HSTS
553 net::SSLConfigService* ssl_config_service_;
555 // Are we currently using preconnection, rather than just DNS resolution, for
556 // subresources and omni-box search URLs.
557 bool preconnect_enabled_;
559 // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
560 std::string last_omnibox_host_;
562 // The time when the last preresolve was done for last_omnibox_host_.
563 base::TimeTicks last_omnibox_preresolve_;
565 // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
566 // preconnecting (because it was to a search service).
567 int consecutive_omnibox_preconnect_count_;
569 // The time when the last preconnection was requested to a search service.
570 base::TimeTicks last_omnibox_preconnect_;
572 class PreconnectUsage;
573 scoped_ptr<PreconnectUsage> preconnect_usage_;
575 // For each URL that we might navigate to (that we've "learned about")
576 // we have a Referrer list. Each Referrer list has all hostnames we might
577 // need to pre-resolve or pre-connect to when there is a navigation to the
578 // orginial hostname.
579 Referrers referrers_;
581 // List of URLs in referrers_ currently being trimmed (scaled down to
582 // eventually be aged out of use).
583 std::vector<GURL> urls_being_trimmed_;
585 // A time after which we need to do more trimming of referrers.
586 base::TimeTicks next_trim_time_;
588 scoped_ptr<base::WeakPtrFactory<Predictor> > weak_factory_;
590 scoped_ptr<ProxyAdvisor> proxy_advisor_;
592 // An observer for testing.
593 PredictorObserver* observer_;
595 DISALLOW_COPY_AND_ASSIGN(Predictor);
598 // This version of the predictor is used for testing.
599 class SimplePredictor : public Predictor {
601 explicit SimplePredictor(bool preconnect_enabled)
602 : Predictor(preconnect_enabled) {}
603 virtual ~SimplePredictor() {}
604 virtual void InitNetworkPredictor(
605 PrefService* user_prefs,
606 PrefService* local_state,
608 net::URLRequestContextGetter* getter) OVERRIDE;
609 virtual void ShutdownOnUIThread() OVERRIDE;
612 } // namespace chrome_browser_net
614 #endif // CHROME_BROWSER_NET_PREDICTOR_H_