1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/net/predictor.h"
12 #include "base/basictypes.h"
13 #include "base/bind.h"
14 #include "base/compiler_specific.h"
15 #include "base/containers/mru_cache.h"
16 #include "base/logging.h"
17 #include "base/metrics/histogram.h"
18 #include "base/prefs/pref_service.h"
19 #include "base/prefs/scoped_user_pref_update.h"
20 #include "base/stl_util.h"
21 #include "base/strings/string_split.h"
22 #include "base/strings/string_util.h"
23 #include "base/strings/stringprintf.h"
24 #include "base/synchronization/waitable_event.h"
25 #include "base/threading/thread_restrictions.h"
26 #include "base/time/time.h"
27 #include "base/values.h"
28 #include "chrome/browser/io_thread.h"
29 #include "chrome/browser/net/preconnect.h"
30 #include "chrome/browser/net/spdyproxy/proxy_advisor.h"
31 #include "chrome/browser/prefs/session_startup_pref.h"
32 #include "chrome/browser/profiles/profile_io_data.h"
33 #include "chrome/common/chrome_switches.h"
34 #include "chrome/common/pref_names.h"
35 #include "components/data_reduction_proxy/browser/data_reduction_proxy_settings.h"
36 #include "components/pref_registry/pref_registry_syncable.h"
37 #include "content/public/browser/browser_thread.h"
38 #include "net/base/address_list.h"
39 #include "net/base/completion_callback.h"
40 #include "net/base/host_port_pair.h"
41 #include "net/base/net_errors.h"
42 #include "net/base/net_log.h"
43 #include "net/dns/host_resolver.h"
44 #include "net/dns/single_request_host_resolver.h"
45 #include "net/http/transport_security_state.h"
46 #include "net/ssl/ssl_config_service.h"
47 #include "net/url_request/url_request_context.h"
48 #include "net/url_request/url_request_context_getter.h"
50 using base::TimeDelta;
51 using content::BrowserThread;
53 namespace chrome_browser_net {
56 const int Predictor::kPredictorReferrerVersion = 2;
57 const double Predictor::kPreconnectWorthyExpectedValue = 0.8;
58 const double Predictor::kDNSPreresolutionWorthyExpectedValue = 0.1;
59 const double Predictor::kDiscardableExpectedValue = 0.05;
60 // The goal is of trimming is to to reduce the importance (number of expected
61 // subresources needed) by a factor of 2 after about 24 hours of uptime. We will
62 // trim roughly once-an-hour of uptime. The ratio to use in each trim operation
63 // is then the 24th root of 0.5. If a user only surfs for 4 hours a day, then
64 // after about 6 days they will have halved all their estimates of subresource
65 // connections. Once this falls below kDiscardableExpectedValue the referrer
67 // TODO(jar): Measure size of referrer lists in the field. Consider an adaptive
68 // system that uses a higher trim ratio when the list is large.
70 const double Predictor::kReferrerTrimRatio = 0.97153;
71 const int64 Predictor::kDurationBetweenTrimmingsHours = 1;
72 const int64 Predictor::kDurationBetweenTrimmingIncrementsSeconds = 15;
73 const size_t Predictor::kUrlsTrimmedPerIncrement = 5u;
74 const size_t Predictor::kMaxSpeculativeParallelResolves = 3;
75 const int Predictor::kMaxUnusedSocketLifetimeSecondsWithoutAGet = 10;
76 // To control our congestion avoidance system, which discards a queue when
77 // resolutions are "taking too long," we need an expected resolution time.
78 // Common average is in the range of 300-500ms.
79 const int kExpectedResolutionTimeMs = 500;
80 const int Predictor::kTypicalSpeculativeGroupSize = 8;
81 const int Predictor::kMaxSpeculativeResolveQueueDelayMs =
82 (kExpectedResolutionTimeMs * Predictor::kTypicalSpeculativeGroupSize) /
83 Predictor::kMaxSpeculativeParallelResolves;
85 static int g_max_queueing_delay_ms =
86 Predictor::kMaxSpeculativeResolveQueueDelayMs;
87 static size_t g_max_parallel_resolves =
88 Predictor::kMaxSpeculativeParallelResolves;
90 // A version number for prefs that are saved. This should be incremented when
91 // we change the format so that we discard old data.
92 static const int kPredictorStartupFormatVersion = 1;
94 class Predictor::LookupRequest {
96 LookupRequest(Predictor* predictor,
97 net::HostResolver* host_resolver,
99 : predictor_(predictor),
101 resolver_(host_resolver) {
104 // Return underlying network resolver status.
105 // net::OK ==> Host was found synchronously.
106 // net:ERR_IO_PENDING ==> Network will callback later with result.
107 // anything else ==> Host was not found synchronously.
109 net::HostResolver::RequestInfo resolve_info(
110 net::HostPortPair::FromURL(url_));
112 // Make a note that this is a speculative resolve request. This allows us
113 // to separate it from real navigations in the observer's callback, and
114 // lets the HostResolver know it can de-prioritize it.
115 resolve_info.set_is_speculative(true);
116 return resolver_.Resolve(
118 net::DEFAULT_PRIORITY,
120 base::Bind(&LookupRequest::OnLookupFinished, base::Unretained(this)),
125 void OnLookupFinished(int result) {
126 predictor_->OnLookupFinished(this, url_, result == net::OK);
129 Predictor* predictor_; // The predictor which started us.
131 const GURL url_; // Hostname to resolve.
132 net::SingleRequestHostResolver resolver_;
133 net::AddressList addresses_;
135 DISALLOW_COPY_AND_ASSIGN(LookupRequest);
138 Predictor::Predictor(bool preconnect_enabled, bool predictor_enabled)
139 : url_request_context_getter_(NULL),
140 predictor_enabled_(predictor_enabled),
142 profile_io_data_(NULL),
143 peak_pending_lookups_(0),
145 max_concurrent_dns_lookups_(g_max_parallel_resolves),
146 max_dns_queue_delay_(
147 TimeDelta::FromMilliseconds(g_max_queueing_delay_ms)),
148 host_resolver_(NULL),
149 transport_security_state_(NULL),
150 ssl_config_service_(NULL),
151 preconnect_enabled_(preconnect_enabled),
152 consecutive_omnibox_preconnect_count_(0),
153 next_trim_time_(base::TimeTicks::Now() +
154 TimeDelta::FromHours(kDurationBetweenTrimmingsHours)),
156 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
159 Predictor::~Predictor() {
160 // TODO(rlp): Add DCHECK for CurrentlyOn(BrowserThread::IO) when the
161 // ProfileManagerTest has been updated with a mock profile.
166 Predictor* Predictor::CreatePredictor(bool preconnect_enabled,
167 bool predictor_enabled,
168 bool simple_shutdown) {
170 return new SimplePredictor(preconnect_enabled, predictor_enabled);
171 return new Predictor(preconnect_enabled, predictor_enabled);
174 void Predictor::RegisterProfilePrefs(
175 user_prefs::PrefRegistrySyncable* registry) {
176 registry->RegisterListPref(prefs::kDnsPrefetchingStartupList,
177 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
178 registry->RegisterListPref(prefs::kDnsPrefetchingHostReferralList,
179 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
182 // --------------------- Start UI methods. ------------------------------------
184 void Predictor::InitNetworkPredictor(PrefService* user_prefs,
185 PrefService* local_state,
187 net::URLRequestContextGetter* getter,
188 ProfileIOData* profile_io_data) {
189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
191 user_prefs_ = user_prefs;
192 url_request_context_getter_ = getter;
194 // Gather the list of hostnames to prefetch on startup.
195 UrlList urls = GetPredictedUrlListAtStartup(user_prefs, local_state);
197 base::ListValue* referral_list =
198 static_cast<base::ListValue*>(user_prefs->GetList(
199 prefs::kDnsPrefetchingHostReferralList)->DeepCopy());
201 // Now that we have the statistics in memory, wipe them from the Preferences
202 // file. They will be serialized back on a clean shutdown. This way we only
203 // have to worry about clearing our in-memory state when Clearing Browsing
205 user_prefs->ClearPref(prefs::kDnsPrefetchingStartupList);
206 user_prefs->ClearPref(prefs::kDnsPrefetchingHostReferralList);
208 #if defined(OS_ANDROID) || defined(OS_IOS)
209 // TODO(marq): Once https://codereview.chromium.org/30883003/ lands, also
210 // condition this on DataReductionProxySettings::IsDataReductionProxyAllowed()
211 // Until then, we may create a proxy advisor when the proxy feature itself
212 // isn't available, and the advisor instance will never send advisory
213 // requests, which is slightly wasteful but not harmful.
214 if (data_reduction_proxy::DataReductionProxyParams::
215 IsIncludedInPreconnectHintingFieldTrial()) {
216 proxy_advisor_.reset(new ProxyAdvisor(user_prefs, getter));
220 BrowserThread::PostTask(
224 &Predictor::FinalizeInitializationOnIOThread,
225 base::Unretained(this),
227 io_thread, profile_io_data));
230 void Predictor::AnticipateOmniboxUrl(const GURL& url, bool preconnectable) {
231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
232 if (!predictor_enabled_)
234 if (!url.is_valid() || !url.has_host())
236 if (!CanPreresolveAndPreconnect())
239 std::string host = url.HostNoBrackets();
240 bool is_new_host_request = (host != last_omnibox_host_);
241 last_omnibox_host_ = host;
243 UrlInfo::ResolutionMotivation motivation(UrlInfo::OMNIBOX_MOTIVATED);
244 base::TimeTicks now = base::TimeTicks::Now();
246 if (preconnect_enabled_) {
247 if (preconnectable && !is_new_host_request) {
248 ++consecutive_omnibox_preconnect_count_;
249 // The omnibox suggests a search URL (for which we can preconnect) after
250 // one or two characters are typed, even though such typing often (1 in
251 // 3?) becomes a real URL. This code waits till is has more evidence of a
252 // preconnectable URL (search URL) before forming a preconnection, so as
253 // to reduce the useless preconnect rate.
254 // Perchance this logic should be pushed back into the omnibox, where the
255 // actual characters typed, such as a space, can better forcast whether
256 // we need to search/preconnect or not. By waiting for at least 4
257 // characters in a row that have lead to a search proposal, we avoid
258 // preconnections for a prefix like "www." and we also wait until we have
259 // at least a 4 letter word to search for.
260 // Each character typed appears to induce 2 calls to
261 // AnticipateOmniboxUrl(), so we double 4 characters and limit at 8
263 // TODO(jar): Use an A/B test to optimize this.
264 const int kMinConsecutiveRequests = 8;
265 if (consecutive_omnibox_preconnect_count_ >= kMinConsecutiveRequests) {
266 // TODO(jar): Perhaps we should do a GET to leave the socket open in the
267 // pool. Currently, we just do a connect, which MAY be reset if we
268 // don't use it in 10 secondes!!! As a result, we may do more
269 // connections, and actually cost the server more than if we did a real
270 // get with a fake request (/gen_204 might be the good path on Google).
271 const int kMaxSearchKeepaliveSeconds(10);
272 if ((now - last_omnibox_preconnect_).InSeconds() <
273 kMaxSearchKeepaliveSeconds)
274 return; // We've done a preconnect recently.
275 last_omnibox_preconnect_ = now;
276 const int kConnectionsNeeded = 1;
278 CanonicalizeUrl(url), GURL(), motivation, kConnectionsNeeded);
279 return; // Skip pre-resolution, since we'll open a connection.
282 consecutive_omnibox_preconnect_count_ = 0;
286 // Fall through and consider pre-resolution.
288 // Omnibox tends to call in pairs (just a few milliseconds apart), and we
289 // really don't need to keep resolving a name that often.
290 // TODO(jar): A/B tests could check for perf impact of the early returns.
291 if (!is_new_host_request) {
292 const int kMinPreresolveSeconds(10);
293 if (kMinPreresolveSeconds > (now - last_omnibox_preresolve_).InSeconds())
296 last_omnibox_preresolve_ = now;
298 BrowserThread::PostTask(
301 base::Bind(&Predictor::Resolve, base::Unretained(this),
302 CanonicalizeUrl(url), motivation));
305 void Predictor::PreconnectUrlAndSubresources(const GURL& url,
306 const GURL& first_party_for_cookies) {
307 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
308 BrowserThread::CurrentlyOn(BrowserThread::IO));
309 if (!predictor_enabled_ || !preconnect_enabled_ ||
310 !url.is_valid() || !url.has_host())
312 if (!CanPreresolveAndPreconnect())
315 UrlInfo::ResolutionMotivation motivation(UrlInfo::EARLY_LOAD_MOTIVATED);
316 const int kConnectionsNeeded = 1;
317 PreconnectUrl(CanonicalizeUrl(url), first_party_for_cookies,
318 motivation, kConnectionsNeeded);
319 PredictFrameSubresources(url.GetWithEmptyPath(), first_party_for_cookies);
322 UrlList Predictor::GetPredictedUrlListAtStartup(
323 PrefService* user_prefs,
324 PrefService* local_state) {
325 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
327 // Recall list of URLs we learned about during last session.
328 // This may catch secondary hostnames, pulled in by the homepages. It will
329 // also catch more of the "primary" home pages, since that was (presumably)
330 // rendered first (and will be rendered first this time too).
331 const base::ListValue* startup_list =
332 user_prefs->GetList(prefs::kDnsPrefetchingStartupList);
335 base::ListValue::const_iterator it = startup_list->begin();
336 int format_version = -1;
337 if (it != startup_list->end() &&
338 (*it)->GetAsInteger(&format_version) &&
339 format_version == kPredictorStartupFormatVersion) {
341 for (; it != startup_list->end(); ++it) {
342 std::string url_spec;
343 if (!(*it)->GetAsString(&url_spec)) {
345 break; // Format incompatibility.
348 if (!url.has_host() || !url.has_scheme()) {
350 break; // Format incompatibility.
358 // Prepare for any static home page(s) the user has in prefs. The user may
359 // have a LOT of tab's specified, so we may as well try to warm them all.
360 SessionStartupPref tab_start_pref =
361 SessionStartupPref::GetStartupPref(user_prefs);
362 if (SessionStartupPref::URLS == tab_start_pref.type) {
363 for (size_t i = 0; i < tab_start_pref.urls.size(); i++) {
364 GURL gurl = tab_start_pref.urls[i];
365 if (!gurl.is_valid() || gurl.SchemeIsFile() || gurl.host().empty())
367 if (gurl.SchemeIsHTTPOrHTTPS())
368 urls.push_back(gurl.GetWithEmptyPath());
373 urls.push_back(GURL("http://www.google.com:80"));
378 void Predictor::set_max_queueing_delay(int max_queueing_delay_ms) {
379 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
380 g_max_queueing_delay_ms = max_queueing_delay_ms;
383 void Predictor::set_max_parallel_resolves(size_t max_parallel_resolves) {
384 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
385 g_max_parallel_resolves = max_parallel_resolves;
388 void Predictor::ShutdownOnUIThread() {
389 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
390 BrowserThread::PostTask(
393 base::Bind(&Predictor::Shutdown, base::Unretained(this)));
396 // ---------------------- End UI methods. -------------------------------------
398 // --------------------- Start IO methods. ------------------------------------
400 void Predictor::Shutdown() {
401 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
405 STLDeleteElements(&pending_lookups_);
408 void Predictor::DiscardAllResults() {
409 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
410 // Delete anything listed so far in this session that shows in about:dns.
414 // Try to delete anything in our work queue.
415 while (!work_queue_.IsEmpty()) {
416 // Emulate processing cycle as though host was not found.
417 GURL url = work_queue_.Pop();
418 UrlInfo* info = &results_[url];
419 DCHECK(info->HasUrl(url));
420 info->SetAssignedState();
421 info->SetNoSuchNameState();
423 // Now every result_ is either resolved, or is being resolved
424 // (see LookupRequest).
426 // Step through result_, recording names of all hosts that can't be erased.
427 // We can't erase anything being worked on.
429 for (Results::iterator it = results_.begin(); results_.end() != it; ++it) {
431 UrlInfo* info = &it->second;
432 DCHECK(info->HasUrl(url));
433 if (info->is_assigned()) {
434 info->SetPendingDeleteState();
435 assignees[url] = *info;
438 DCHECK_LE(assignees.size(), max_concurrent_dns_lookups_);
440 // Put back in the names being worked on.
441 for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) {
442 DCHECK(it->second.is_marked_to_delete());
443 results_[it->first] = it->second;
447 // Overloaded Resolve() to take a vector of names.
448 void Predictor::ResolveList(const UrlList& urls,
449 UrlInfo::ResolutionMotivation motivation) {
450 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
452 for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) {
453 AppendToResolutionQueue(*it, motivation);
457 // Basic Resolve() takes an invidual name, and adds it
459 void Predictor::Resolve(const GURL& url,
460 UrlInfo::ResolutionMotivation motivation) {
461 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
464 AppendToResolutionQueue(url, motivation);
467 void Predictor::LearnFromNavigation(const GURL& referring_url,
468 const GURL& target_url) {
469 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
470 if (!predictor_enabled_ || !CanPrefetchAndPrerender())
472 DCHECK_EQ(referring_url, Predictor::CanonicalizeUrl(referring_url));
473 DCHECK_NE(referring_url, GURL::EmptyGURL());
474 DCHECK_EQ(target_url, Predictor::CanonicalizeUrl(target_url));
475 DCHECK_NE(target_url, GURL::EmptyGURL());
477 referrers_[referring_url].SuggestHost(target_url);
478 // Possibly do some referrer trimming.
482 //-----------------------------------------------------------------------------
483 // This section supports the about:dns page.
485 void Predictor::PredictorGetHtmlInfo(Predictor* predictor,
486 std::string* output) {
487 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
489 output->append("<html><head><title>About DNS</title>"
490 // We'd like the following no-cache... but it doesn't work.
491 // "<META HTTP-EQUIV=\"Pragma\" CONTENT=\"no-cache\">"
493 if (predictor && predictor->predictor_enabled() &&
494 predictor->CanPrefetchAndPrerender()) {
495 predictor->GetHtmlInfo(output);
497 output->append("DNS pre-resolution and TCP pre-connection is disabled.");
499 output->append("</body></html>");
502 // Provide sort order so all .com's are together, etc.
503 struct RightToLeftStringSorter {
504 bool operator()(const GURL& left, const GURL& right) const {
505 return ReverseComponents(left) < ReverseComponents(right);
509 // Transforms something like "http://www.google.com/xyz" to
510 // "http://com.google.www/xyz".
511 static std::string ReverseComponents(const GURL& url) {
512 // Reverse the components in the hostname.
513 std::vector<std::string> parts;
514 base::SplitString(url.host(), '.', &parts);
515 std::reverse(parts.begin(), parts.end());
516 std::string reversed_host = JoinString(parts, '.');
518 // Return the new URL.
519 GURL::Replacements url_components;
520 url_components.SetHostStr(reversed_host);
521 return url.ReplaceComponents(url_components).spec();
525 void Predictor::GetHtmlReferrerLists(std::string* output) {
526 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
527 if (referrers_.empty())
530 // TODO(jar): Remove any plausible JavaScript from names before displaying.
532 typedef std::set<GURL, struct RightToLeftStringSorter>
534 SortedNames sorted_names;
536 for (Referrers::iterator it = referrers_.begin();
537 referrers_.end() != it; ++it)
538 sorted_names.insert(it->first);
540 output->append("<br><table border>");
542 "<tr><th>Host for Page</th>"
543 "<th>Page Load<br>Count</th>"
544 "<th>Subresource<br>Navigations</th>"
545 "<th>Subresource<br>PreConnects</th>"
546 "<th>Subresource<br>PreResolves</th>"
547 "<th>Expected<br>Connects</th>"
548 "<th>Subresource Spec</th></tr>");
550 for (SortedNames::iterator it = sorted_names.begin();
551 sorted_names.end() != it; ++it) {
552 Referrer* referrer = &(referrers_[*it]);
553 bool first_set_of_futures = true;
554 for (Referrer::iterator future_url = referrer->begin();
555 future_url != referrer->end(); ++future_url) {
556 output->append("<tr align=right>");
557 if (first_set_of_futures) {
558 base::StringAppendF(output,
559 "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>",
560 static_cast<int>(referrer->size()),
562 static_cast<int>(referrer->size()),
563 static_cast<int>(referrer->use_count()));
565 first_set_of_futures = false;
566 base::StringAppendF(output,
567 "<td>%d</td><td>%d</td><td>%d</td><td>%2.3f</td><td>%s</td></tr>",
568 static_cast<int>(future_url->second.navigation_count()),
569 static_cast<int>(future_url->second.preconnection_count()),
570 static_cast<int>(future_url->second.preresolution_count()),
571 static_cast<double>(future_url->second.subresource_use_rate()),
572 future_url->first.spec().c_str());
575 output->append("</table>");
578 void Predictor::GetHtmlInfo(std::string* output) {
579 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
580 if (initial_observer_.get())
581 initial_observer_->GetFirstResolutionsHtml(output);
582 // Show list of subresource predictions and stats.
583 GetHtmlReferrerLists(output);
585 // Local lists for calling UrlInfo
586 UrlInfo::UrlInfoTable name_not_found;
587 UrlInfo::UrlInfoTable name_preresolved;
589 // Get copies of all useful data.
590 typedef std::map<GURL, UrlInfo, RightToLeftStringSorter> SortedUrlInfo;
591 SortedUrlInfo snapshot;
592 // UrlInfo supports value semantics, so we can do a shallow copy.
593 for (Results::iterator it(results_.begin()); it != results_.end(); it++)
594 snapshot[it->first] = it->second;
596 // Partition the UrlInfo's into categories.
597 for (SortedUrlInfo::iterator it(snapshot.begin());
598 it != snapshot.end(); it++) {
599 if (it->second.was_nonexistent()) {
600 name_not_found.push_back(it->second);
603 if (!it->second.was_found())
604 continue; // Still being processed.
605 name_preresolved.push_back(it->second);
613 // Call for display of each table, along with title.
614 UrlInfo::GetHtmlTable(name_preresolved,
615 "Preresolution DNS records performed for ", brief, output);
616 UrlInfo::GetHtmlTable(name_not_found,
617 "Preresolving DNS records revealed non-existence for ", brief, output);
620 void Predictor::TrimReferrersNow() {
621 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
622 // Just finish up work if an incremental trim is in progress.
623 if (urls_being_trimmed_.empty())
624 LoadUrlsForTrimming();
625 IncrementalTrimReferrers(true); // Do everything now.
628 void Predictor::SerializeReferrers(base::ListValue* referral_list) {
629 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
630 referral_list->Clear();
631 referral_list->Append(new base::FundamentalValue(kPredictorReferrerVersion));
632 for (Referrers::const_iterator it = referrers_.begin();
633 it != referrers_.end(); ++it) {
634 // Serialize the list of subresource names.
635 base::Value* subresource_list(it->second.Serialize());
637 // Create a list for each referer.
638 base::ListValue* motivator(new base::ListValue);
639 motivator->Append(new base::StringValue(it->first.spec()));
640 motivator->Append(subresource_list);
642 referral_list->Append(motivator);
646 void Predictor::DeserializeReferrers(const base::ListValue& referral_list) {
647 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
648 int format_version = -1;
649 if (referral_list.GetSize() > 0 &&
650 referral_list.GetInteger(0, &format_version) &&
651 format_version == kPredictorReferrerVersion) {
652 for (size_t i = 1; i < referral_list.GetSize(); ++i) {
653 const base::ListValue* motivator;
654 if (!referral_list.GetList(i, &motivator)) {
658 std::string motivating_url_spec;
659 if (!motivator->GetString(0, &motivating_url_spec)) {
664 const base::Value* subresource_list;
665 if (!motivator->Get(1, &subresource_list)) {
670 referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list);
675 void Predictor::DeserializeReferrersThenDelete(
676 base::ListValue* referral_list) {
677 DeserializeReferrers(*referral_list);
678 delete referral_list;
681 void Predictor::DiscardInitialNavigationHistory() {
682 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
683 if (initial_observer_.get())
684 initial_observer_->DiscardInitialNavigationHistory();
687 void Predictor::FinalizeInitializationOnIOThread(
688 const UrlList& startup_urls,
689 base::ListValue* referral_list,
691 ProfileIOData* profile_io_data) {
692 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
694 profile_io_data_ = profile_io_data;
695 initial_observer_.reset(new InitialObserver());
696 host_resolver_ = io_thread->globals()->host_resolver.get();
698 net::URLRequestContext* context =
699 url_request_context_getter_->GetURLRequestContext();
700 transport_security_state_ = context->transport_security_state();
701 ssl_config_service_ = context->ssl_config_service();
703 // base::WeakPtrFactory instances need to be created and destroyed
704 // on the same thread. The predictor lives on the IO thread and will die
705 // from there so now that we're on the IO thread we need to properly
706 // initialize the base::WeakPtrFactory.
707 // TODO(groby): Check if WeakPtrFactory has the same constraint.
708 weak_factory_.reset(new base::WeakPtrFactory<Predictor>(this));
710 // Prefetch these hostnames on startup.
711 DnsPrefetchMotivatedList(startup_urls, UrlInfo::STARTUP_LIST_MOTIVATED);
712 DeserializeReferrersThenDelete(referral_list);
715 //-----------------------------------------------------------------------------
716 // This section intermingles prefetch results with actual browser HTTP
717 // network activity. It supports calculating of the benefit of a prefetch, as
718 // well as recording what prefetched hostname resolutions might be potentially
719 // helpful during the next chrome-startup.
720 //-----------------------------------------------------------------------------
722 void Predictor::LearnAboutInitialNavigation(const GURL& url) {
723 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
724 if (!predictor_enabled_ || NULL == initial_observer_.get() ||
725 !CanPrefetchAndPrerender()) {
728 initial_observer_->Append(url, this);
731 // This API is only used in the browser process.
732 // It is called from an IPC message originating in the renderer. It currently
733 // includes both Page-Scan, and Link-Hover prefetching.
734 // TODO(jar): Separate out link-hover prefetching, and page-scan results.
735 void Predictor::DnsPrefetchList(const NameList& hostnames) {
736 // TODO(jar): Push GURL transport further back into renderer, but this will
737 // require a Webkit change in the observer :-/.
739 for (NameList::const_iterator it = hostnames.begin();
740 it < hostnames.end();
742 urls.push_back(GURL("http://" + *it + ":80"));
745 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
746 DnsPrefetchMotivatedList(urls, UrlInfo::PAGE_SCAN_MOTIVATED);
749 void Predictor::DnsPrefetchMotivatedList(
751 UrlInfo::ResolutionMotivation motivation) {
752 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
753 BrowserThread::CurrentlyOn(BrowserThread::IO));
754 if (!predictor_enabled_)
756 if (!CanPrefetchAndPrerender())
759 if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
760 ResolveList(urls, motivation);
762 BrowserThread::PostTask(
765 base::Bind(&Predictor::ResolveList, base::Unretained(this),
770 //-----------------------------------------------------------------------------
771 // Functions to handle saving of hostnames from one session to the next, to
772 // expedite startup times.
774 static void SaveDnsPrefetchStateForNextStartupAndTrimOnIOThread(
775 base::ListValue* startup_list,
776 base::ListValue* referral_list,
777 base::WaitableEvent* completion,
778 Predictor* predictor) {
779 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
781 if (NULL == predictor) {
782 completion->Signal();
785 predictor->SaveDnsPrefetchStateForNextStartupAndTrim(
786 startup_list, referral_list, completion);
789 void Predictor::SaveStateForNextStartupAndTrim() {
790 if (!predictor_enabled_)
792 if (!CanPrefetchAndPrerender())
795 base::WaitableEvent completion(true, false);
797 ListPrefUpdate update_startup_list(user_prefs_,
798 prefs::kDnsPrefetchingStartupList);
799 ListPrefUpdate update_referral_list(user_prefs_,
800 prefs::kDnsPrefetchingHostReferralList);
801 if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
802 SaveDnsPrefetchStateForNextStartupAndTrimOnIOThread(
803 update_startup_list.Get(),
804 update_referral_list.Get(),
808 bool posted = BrowserThread::PostTask(
812 &SaveDnsPrefetchStateForNextStartupAndTrimOnIOThread,
813 update_startup_list.Get(),
814 update_referral_list.Get(),
818 // TODO(jar): Synchronous waiting for the IO thread is a potential source
819 // to deadlocks and should be investigated. See http://crbug.com/78451.
822 // http://crbug.com/124954
823 base::ThreadRestrictions::ScopedAllowWait allow_wait;
829 void Predictor::SaveDnsPrefetchStateForNextStartupAndTrim(
830 base::ListValue* startup_list,
831 base::ListValue* referral_list,
832 base::WaitableEvent* completion) {
833 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
834 if (initial_observer_.get())
835 initial_observer_->GetInitialDnsResolutionList(startup_list);
837 // Do at least one trim at shutdown, in case the user wasn't running long
838 // enough to do any regular trimming of referrers.
840 SerializeReferrers(referral_list);
842 completion->Signal();
845 void Predictor::PreconnectUrl(const GURL& url,
846 const GURL& first_party_for_cookies,
847 UrlInfo::ResolutionMotivation motivation,
849 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
850 BrowserThread::CurrentlyOn(BrowserThread::IO));
852 if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
853 PreconnectUrlOnIOThread(url, first_party_for_cookies, motivation, count);
855 BrowserThread::PostTask(
858 base::Bind(&Predictor::PreconnectUrlOnIOThread,
859 base::Unretained(this), url, first_party_for_cookies,
864 void Predictor::PreconnectUrlOnIOThread(
865 const GURL& original_url,
866 const GURL& first_party_for_cookies,
867 UrlInfo::ResolutionMotivation motivation,
869 // Skip the HSTS redirect.
870 GURL url = GetHSTSRedirectOnIOThread(original_url);
872 AdviseProxy(url, motivation, true /* is_preconnect */);
875 observer_->OnPreconnectUrl(
876 url, first_party_for_cookies, motivation, count);
879 PreconnectOnIOThread(url,
880 first_party_for_cookies,
883 url_request_context_getter_.get());
886 void Predictor::PredictFrameSubresources(const GURL& url,
887 const GURL& first_party_for_cookies) {
888 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
889 BrowserThread::CurrentlyOn(BrowserThread::IO));
890 if (!predictor_enabled_)
892 if (!CanPrefetchAndPrerender())
894 DCHECK_EQ(url.GetWithEmptyPath(), url);
895 // Add one pass through the message loop to allow current navigation to
897 if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
898 PrepareFrameSubresources(url, first_party_for_cookies);
900 BrowserThread::PostTask(
903 base::Bind(&Predictor::PrepareFrameSubresources,
904 base::Unretained(this), url, first_party_for_cookies));
908 void Predictor::AdviseProxy(const GURL& url,
909 UrlInfo::ResolutionMotivation motivation,
910 bool is_preconnect) {
914 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
915 BrowserThread::CurrentlyOn(BrowserThread::IO));
917 if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
918 AdviseProxyOnIOThread(url, motivation, is_preconnect);
920 BrowserThread::PostTask(
923 base::Bind(&Predictor::AdviseProxyOnIOThread,
924 base::Unretained(this), url, motivation, is_preconnect));
928 bool Predictor::CanPrefetchAndPrerender() const {
929 if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
930 return chrome_browser_net::CanPrefetchAndPrerenderUI(user_prefs_);
932 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
933 return chrome_browser_net::CanPrefetchAndPrerenderIO(profile_io_data_);
937 bool Predictor::CanPreresolveAndPreconnect() const {
938 if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
939 return chrome_browser_net::CanPreresolveAndPreconnectUI(user_prefs_);
941 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
942 return chrome_browser_net::CanPreresolveAndPreconnectIO(profile_io_data_);
946 enum SubresourceValue {
950 SUBRESOURCE_VALUE_MAX
953 void Predictor::PrepareFrameSubresources(const GURL& original_url,
954 const GURL& first_party_for_cookies) {
955 // Apply HSTS redirect early so it is taken into account when looking up
957 GURL url = GetHSTSRedirectOnIOThread(original_url);
959 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
960 DCHECK_EQ(url.GetWithEmptyPath(), url);
961 Referrers::iterator it = referrers_.find(url);
962 if (referrers_.end() == it) {
963 // Only when we don't know anything about this url, make 2 connections
964 // available. We could do this completely via learning (by prepopulating
965 // the referrer_ list with this expected value), but it would swell the
966 // size of the list with all the "Leaf" nodes in the tree (nodes that don't
967 // load any subresources). If we learn about this resource, we will instead
968 // provide a more carefully estimated preconnection count.
969 if (preconnect_enabled_) {
970 PreconnectUrlOnIOThread(url, first_party_for_cookies,
971 UrlInfo::SELF_REFERAL_MOTIVATED, 2);
976 Referrer* referrer = &(it->second);
977 referrer->IncrementUseCount();
978 const UrlInfo::ResolutionMotivation motivation =
979 UrlInfo::LEARNED_REFERAL_MOTIVATED;
980 for (Referrer::iterator future_url = referrer->begin();
981 future_url != referrer->end(); ++future_url) {
982 SubresourceValue evalution(TOO_NEW);
983 double connection_expectation = future_url->second.subresource_use_rate();
984 UMA_HISTOGRAM_CUSTOM_COUNTS("Net.PreconnectSubresourceExpectation",
985 static_cast<int>(connection_expectation * 100),
987 future_url->second.ReferrerWasObserved();
988 if (preconnect_enabled_ &&
989 connection_expectation > kPreconnectWorthyExpectedValue) {
990 evalution = PRECONNECTION;
991 future_url->second.IncrementPreconnectionCount();
992 int count = static_cast<int>(std::ceil(connection_expectation));
993 if (url.host() == future_url->first.host())
995 PreconnectUrlOnIOThread(future_url->first, first_party_for_cookies,
997 } else if (connection_expectation > kDNSPreresolutionWorthyExpectedValue) {
998 evalution = PRERESOLUTION;
999 future_url->second.preresolution_increment();
1000 UrlInfo* queued_info = AppendToResolutionQueue(future_url->first,
1003 queued_info->SetReferringHostname(url);
1005 UMA_HISTOGRAM_ENUMERATION("Net.PreconnectSubresourceEval", evalution,
1006 SUBRESOURCE_VALUE_MAX);
1010 void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url,
1012 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1014 LookupFinished(request, url, found);
1015 pending_lookups_.erase(request);
1018 StartSomeQueuedResolutions();
1021 void Predictor::LookupFinished(LookupRequest* request, const GURL& url,
1023 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1024 UrlInfo* info = &results_[url];
1025 DCHECK(info->HasUrl(url));
1026 if (info->is_marked_to_delete()) {
1027 results_.erase(url);
1030 info->SetFoundState();
1032 info->SetNoSuchNameState();
1036 UrlInfo* Predictor::AppendToResolutionQueue(
1038 UrlInfo::ResolutionMotivation motivation) {
1039 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1040 DCHECK(url.has_host());
1045 UrlInfo* info = &results_[url];
1046 info->SetUrl(url); // Initialize or DCHECK.
1047 // TODO(jar): I need to discard names that have long since expired.
1048 // Currently we only add to the domain map :-/
1050 DCHECK(info->HasUrl(url));
1052 if (!info->NeedsDnsUpdate()) {
1053 info->DLogResultsStats("DNS PrefetchNotUpdated");
1057 AdviseProxy(url, motivation, false /* is_preconnect */);
1058 if (proxy_advisor_ && proxy_advisor_->WouldProxyURL(url)) {
1059 info->DLogResultsStats("DNS PrefetchForProxiedRequest");
1063 info->SetQueuedState(motivation);
1064 work_queue_.Push(url, motivation);
1065 StartSomeQueuedResolutions();
1069 bool Predictor::CongestionControlPerformed(UrlInfo* info) {
1070 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1071 // Note: queue_duration is ONLY valid after we go to assigned state.
1072 if (info->queue_duration() < max_dns_queue_delay_)
1074 // We need to discard all entries in our queue, as we're keeping them waiting
1075 // too long. By doing this, we'll have a chance to quickly service urgent
1076 // resolutions, and not have a bogged down system.
1078 info->RemoveFromQueue();
1079 if (work_queue_.IsEmpty())
1081 info = &results_[work_queue_.Pop()];
1082 info->SetAssignedState();
1087 void Predictor::StartSomeQueuedResolutions() {
1088 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1090 while (!work_queue_.IsEmpty() &&
1091 pending_lookups_.size() < max_concurrent_dns_lookups_) {
1092 const GURL url(work_queue_.Pop());
1093 UrlInfo* info = &results_[url];
1094 DCHECK(info->HasUrl(url));
1095 info->SetAssignedState();
1097 if (CongestionControlPerformed(info)) {
1098 DCHECK(work_queue_.IsEmpty());
1102 LookupRequest* request = new LookupRequest(this, host_resolver_, url);
1103 int status = request->Start();
1104 if (status == net::ERR_IO_PENDING) {
1105 // Will complete asynchronously.
1106 pending_lookups_.insert(request);
1107 peak_pending_lookups_ = std::max(peak_pending_lookups_,
1108 pending_lookups_.size());
1110 // Completed synchronously (was already cached by HostResolver), or else
1111 // there was (equivalently) some network error that prevents us from
1112 // finding the name. Status net::OK means it was "found."
1113 LookupFinished(request, url, status == net::OK);
1119 void Predictor::TrimReferrers() {
1120 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1121 if (!urls_being_trimmed_.empty())
1122 return; // There is incremental trimming in progress already.
1124 // Check to see if it is time to trim yet.
1125 base::TimeTicks now = base::TimeTicks::Now();
1126 if (now < next_trim_time_)
1128 next_trim_time_ = now + TimeDelta::FromHours(kDurationBetweenTrimmingsHours);
1130 LoadUrlsForTrimming();
1131 PostIncrementalTrimTask();
1134 void Predictor::LoadUrlsForTrimming() {
1135 DCHECK(urls_being_trimmed_.empty());
1136 for (Referrers::const_iterator it = referrers_.begin();
1137 it != referrers_.end(); ++it)
1138 urls_being_trimmed_.push_back(it->first);
1139 UMA_HISTOGRAM_COUNTS("Net.PredictionTrimSize", urls_being_trimmed_.size());
1142 void Predictor::PostIncrementalTrimTask() {
1143 if (urls_being_trimmed_.empty())
1145 const TimeDelta kDurationBetweenTrimmingIncrements =
1146 TimeDelta::FromSeconds(kDurationBetweenTrimmingIncrementsSeconds);
1147 base::MessageLoop::current()->PostDelayedTask(
1149 base::Bind(&Predictor::IncrementalTrimReferrers,
1150 weak_factory_->GetWeakPtr(), false),
1151 kDurationBetweenTrimmingIncrements);
1154 void Predictor::IncrementalTrimReferrers(bool trim_all_now) {
1155 size_t trim_count = urls_being_trimmed_.size();
1157 trim_count = std::min(trim_count, kUrlsTrimmedPerIncrement);
1158 while (trim_count-- != 0) {
1159 Referrers::iterator it = referrers_.find(urls_being_trimmed_.back());
1160 urls_being_trimmed_.pop_back();
1161 if (it == referrers_.end())
1162 continue; // Defensive code: It got trimmed away already.
1163 if (!it->second.Trim(kReferrerTrimRatio, kDiscardableExpectedValue))
1164 referrers_.erase(it);
1166 PostIncrementalTrimTask();
1169 void Predictor::AdviseProxyOnIOThread(const GURL& url,
1170 UrlInfo::ResolutionMotivation motivation,
1171 bool is_preconnect) {
1172 if (!proxy_advisor_)
1174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1175 proxy_advisor_->Advise(url, motivation, is_preconnect);
1178 GURL Predictor::GetHSTSRedirectOnIOThread(const GURL& url) {
1179 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1181 if (!transport_security_state_)
1183 if (!url.SchemeIs("http"))
1185 bool sni_available =
1186 net::SSLConfigService::IsSNIAvailable(ssl_config_service_);
1187 if (!transport_security_state_->ShouldUpgradeToSSL(url.host(), sni_available))
1190 url::Replacements<char> replacements;
1191 const char kNewScheme[] = "https";
1192 replacements.SetScheme(kNewScheme, url::Component(0, strlen(kNewScheme)));
1193 return url.ReplaceComponents(replacements);
1196 // ---------------------- End IO methods. -------------------------------------
1198 //-----------------------------------------------------------------------------
1200 Predictor::HostNameQueue::HostNameQueue() {
1203 Predictor::HostNameQueue::~HostNameQueue() {
1206 void Predictor::HostNameQueue::Push(const GURL& url,
1207 UrlInfo::ResolutionMotivation motivation) {
1208 switch (motivation) {
1209 case UrlInfo::STATIC_REFERAL_MOTIVATED:
1210 case UrlInfo::LEARNED_REFERAL_MOTIVATED:
1211 case UrlInfo::MOUSE_OVER_MOTIVATED:
1212 rush_queue_.push(url);
1216 background_queue_.push(url);
1221 bool Predictor::HostNameQueue::IsEmpty() const {
1222 return rush_queue_.empty() && background_queue_.empty();
1225 GURL Predictor::HostNameQueue::Pop() {
1227 std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_
1229 GURL url(queue->front());
1234 //-----------------------------------------------------------------------------
1235 // Member definitions for InitialObserver class.
1237 Predictor::InitialObserver::InitialObserver() {
1240 Predictor::InitialObserver::~InitialObserver() {
1243 void Predictor::InitialObserver::Append(const GURL& url,
1244 Predictor* predictor) {
1245 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1247 // TODO(rlp): Do we really need the predictor check here?
1248 if (NULL == predictor)
1250 if (kStartupResolutionCount <= first_navigations_.size())
1253 DCHECK(url.SchemeIsHTTPOrHTTPS());
1254 DCHECK_EQ(url, Predictor::CanonicalizeUrl(url));
1255 if (first_navigations_.find(url) == first_navigations_.end())
1256 first_navigations_[url] = base::TimeTicks::Now();
1259 void Predictor::InitialObserver::GetInitialDnsResolutionList(
1260 base::ListValue* startup_list) {
1261 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1262 DCHECK(startup_list);
1263 startup_list->Clear();
1264 DCHECK_EQ(0u, startup_list->GetSize());
1265 startup_list->Append(
1266 new base::FundamentalValue(kPredictorStartupFormatVersion));
1267 for (FirstNavigations::iterator it = first_navigations_.begin();
1268 it != first_navigations_.end();
1270 DCHECK(it->first == Predictor::CanonicalizeUrl(it->first));
1271 startup_list->Append(new base::StringValue(it->first.spec()));
1275 void Predictor::InitialObserver::GetFirstResolutionsHtml(
1276 std::string* output) {
1277 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
1279 UrlInfo::UrlInfoTable resolution_list;
1281 for (FirstNavigations::iterator it(first_navigations_.begin());
1282 it != first_navigations_.end();
1285 info.SetUrl(it->first);
1286 info.set_time(it->second);
1287 resolution_list.push_back(info);
1290 UrlInfo::GetHtmlTable(resolution_list,
1291 "Future startups will prefetch DNS records for ", false, output);
1294 //-----------------------------------------------------------------------------
1296 //-----------------------------------------------------------------------------
1299 GURL Predictor::CanonicalizeUrl(const GURL& url) {
1300 if (!url.has_host())
1301 return GURL::EmptyGURL();
1304 if (url.has_scheme()) {
1305 scheme = url.scheme();
1306 if (scheme != "http" && scheme != "https")
1307 return GURL::EmptyGURL();
1309 return url.GetWithEmptyPath();
1314 // If we omit a port, it will default to 80 or 443 as appropriate.
1315 std::string colon_plus_port;
1317 colon_plus_port = ":" + url.port();
1319 return GURL(scheme + "://" + url.host() + colon_plus_port);
1322 void SimplePredictor::InitNetworkPredictor(
1323 PrefService* user_prefs,
1324 PrefService* local_state,
1325 IOThread* io_thread,
1326 net::URLRequestContextGetter* getter,
1327 ProfileIOData* profile_io_data) {
1328 // Empty function for unittests.
1331 void SimplePredictor::ShutdownOnUIThread() {
1335 bool SimplePredictor::CanPrefetchAndPrerender() const { return true; }
1336 bool SimplePredictor::CanPreresolveAndPreconnect() const { return true; }
1338 } // namespace chrome_browser_net