src/chrome/browser/net/predictor.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // A Predictor object is instantiated once in the browser process, and manages
   6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected
   7 // subresources.
   8 // Most hostname lists are provided by the renderer processes, and include URLs
   9 // that *might* be used in the near future by the browsing user.  One goal of
  10 // this class is to cause the underlying DNS structure to lookup a hostname
  11 // before it is really needed, and hence reduce latency in the standard lookup
  12 // paths.
  13 // Subresource relationships are usually acquired from the referrer field in a
  14 // navigation.  A subresource URL may be associated with a referrer URL.  Later
  15 // navigations may, if the likelihood of needing the subresource is high enough,
  16 // cause this module to speculatively create a TCP/IP connection. If there is
  17 // only a low likelihood, then a DNS pre-resolution operation may be performed.
  18
  19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_
  20 #define CHROME_BROWSER_NET_PREDICTOR_H_
  21
  22 #include <map>
  23 #include <queue>
  24 #include <set>
  25 #include <string>
  26 #include <vector>
  27
  28 #include "base/gtest_prod_util.h"
  29 #include "base/memory/scoped_ptr.h"
  30 #include "base/memory/weak_ptr.h"
  31 #include "chrome/browser/net/referrer.h"
  32 #include "chrome/browser/net/timed_cache.h"
  33 #include "chrome/browser/net/url_info.h"
  34 #include "chrome/common/net/predictor_common.h"
  35 #include "net/base/host_port_pair.h"
  36
  37 class IOThread;
  38 class PrefService;
  39 class Profile;
  40
  41 namespace base {
  42 class ListValue;
  43 class WaitableEvent;
  44 }
  45
  46 namespace net {
  47 class HostResolver;
  48 class URLRequestContextGetter;
  49 }
  50
  51 namespace user_prefs {
  52 class PrefRegistrySyncable;
  53 }
  54
  55 namespace chrome_browser_net {
  56
  57 typedef chrome_common_net::UrlList UrlList;
  58 typedef chrome_common_net::NameList NameList;
  59 typedef std::map<GURL, UrlInfo> Results;
  60
  61 // Predictor is constructed during Profile construction (on the UI thread),
  62 // but it is destroyed on the IO thread when ProfileIOData goes away. All of
  63 // its core state and functionality happens on the IO thread. The only UI
  64 // methods are initialization / shutdown related (including preconnect
  65 // initialization), or convenience methods that internally forward calls to
  66 // the IO thread.
  67 class Predictor {
  68  public:
  69   // A version number for prefs that are saved. This should be incremented when
  70   // we change the format so that we discard old data.
  71   static const int kPredictorReferrerVersion;
  72
  73   // Given that the underlying Chromium resolver defaults to a total maximum of
  74   // 8 paralell resolutions, we will avoid any chance of starving navigational
  75   // resolutions by limiting the number of paralell speculative resolutions.
  76   // This is used in the field trials and testing.
  77   // TODO(jar): Move this limitation into the resolver.
  78   static const size_t kMaxSpeculativeParallelResolves;
  79
  80   // To control the congestion avoidance system, we need an estimate of how
  81   // many speculative requests may arrive at once.  Since we currently only
  82   // keep 8 subresource names for each frame, we'll use that as our basis.
  83   // Note that when scanning search results lists, we might actually get 10 at
  84   // a time, and wikipedia can often supply (during a page scan) upwards of 50.
  85   // In those odd cases, we may discard some of the later speculative requests
  86   // mistakenly assuming that the resolutions took too long.
  87   static const int kTypicalSpeculativeGroupSize;
  88
  89   // The next constant specifies an amount of queueing delay that is
  90   // "too large," and indicative of problems with resolutions (perhaps due to
  91   // an overloaded router, or such).  When we exceed this delay, congestion
  92   // avoidance will kick in and all speculations in the queue will be discarded.
  93   static const int kMaxSpeculativeResolveQueueDelayMs;
  94
  95   // We don't bother learning to preconnect via a GET if the original URL
  96   // navigation was so long ago, that a preconnection would have been dropped
  97   // anyway.  We believe most servers will drop the connection in 10 seconds, so
  98   // we currently estimate this time-till-drop at 10 seconds.
  99   // TODO(jar): We should do a persistent field trial to validate/optimize this.
 100   static const int kMaxUnusedSocketLifetimeSecondsWithoutAGet;
 101
 102   // |max_concurrent| specifies how many concurrent (parallel) prefetches will
 103   // be performed. Host lookups will be issued through |host_resolver|.
 104   explicit Predictor(bool preconnect_enabled);
 105
 106   virtual ~Predictor();
 107
 108   // This function is used to create a predictor. For testing, we can create
 109   // a version which does a simpler shutdown.
 110   static Predictor* CreatePredictor(bool preconnect_enabled,
 111                                     bool simple_shutdown);
 112
 113   static void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry);
 114
 115   // ------------- Start UI thread methods.
 116
 117   virtual void InitNetworkPredictor(PrefService* user_prefs,
 118                                     PrefService* local_state,
 119                                     IOThread* io_thread,
 120                                     net::URLRequestContextGetter* getter);
 121
 122   // The Omnibox has proposed a given url to the user, and if it is a search
 123   // URL, then it also indicates that this is preconnectable (i.e., we could
 124   // preconnect to the search server).
 125   void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);
 126
 127   // Preconnect a URL and all of its subresource domains.
 128   void PreconnectUrlAndSubresources(const GURL& url,
 129                                     const GURL& first_party_for_cookies);
 130
 131   static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs,
 132                                               PrefService* local_state);
 133
 134   static void set_max_queueing_delay(int max_queueing_delay_ms);
 135
 136   static void set_max_parallel_resolves(size_t max_parallel_resolves);
 137
 138   virtual void ShutdownOnUIThread();
 139
 140   // ------------- End UI thread methods.
 141
 142   // ------------- Start IO thread methods.
 143
 144   // Cancel pending requests and prevent new ones from being made.
 145   void Shutdown();
 146
 147   // In some circumstances, for privacy reasons, all results should be
 148   // discarded.  This method gracefully handles that activity.
 149   // Destroy all our internal state, which shows what names we've looked up, and
 150   // how long each has taken, etc. etc.  We also destroy records of suggesses
 151   // (cache hits etc.).
 152   void DiscardAllResults();
 153
 154   // Add hostname(s) to the queue for processing.
 155   void ResolveList(const UrlList& urls,
 156                    UrlInfo::ResolutionMotivation motivation);
 157
 158   void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation);
 159
 160   // Record details of a navigation so that we can preresolve the host name
 161   // ahead of time the next time the users navigates to the indicated host.
 162   // Should only be called when urls are distinct, and they should already be
 163   // canonicalized to not have a path.
 164   void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);
 165
 166   // When displaying info in about:dns, the following API is called.
 167   static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output);
 168
 169   // Dump HTML table containing list of referrers for about:dns.
 170   void GetHtmlReferrerLists(std::string* output);
 171
 172   // Dump the list of currently known referrer domains and related prefetchable
 173   // domains for about:dns.
 174   void GetHtmlInfo(std::string* output);
 175
 176   // Discards any referrer for which all the suggested host names are currently
 177   // annotated with negligible expected-use.  Scales down (diminishes) the
 178   // expected-use of those that remain, so that their use will go down by a
 179   // factor each time we trim (moving the referrer closer to being discarded in
 180   // a future call).
 181   // The task is performed synchronously and completes before returing.
 182   void TrimReferrersNow();
 183
 184   // Construct a ListValue object that contains all the data in the referrers_
 185   // so that it can be persisted in a pref.
 186   void SerializeReferrers(base::ListValue* referral_list);
 187
 188   // Process a ListValue that contains all the data from a previous reference
 189   // list, as constructed by SerializeReferrers(), and add all the identified
 190   // values into the current referrer list.
 191   void DeserializeReferrers(const base::ListValue& referral_list);
 192
 193   void DeserializeReferrersThenDelete(base::ListValue* referral_list);
 194
 195   void DiscardInitialNavigationHistory();
 196
 197   void FinalizeInitializationOnIOThread(
 198       const std::vector<GURL>& urls_to_prefetch,
 199       base::ListValue* referral_list,
 200       IOThread* io_thread,
 201       bool predictor_enabled);
 202
 203   // During startup, we learn what the first N urls visited are, and then
 204   // resolve the associated hosts ASAP during our next startup.
 205   void LearnAboutInitialNavigation(const GURL& url);
 206
 207   // Renderer bundles up list and sends to this browser API via IPC.
 208   // TODO(jar): Use UrlList instead to include port and scheme.
 209   void DnsPrefetchList(const NameList& hostnames);
 210
 211   // May be called from either the IO or UI thread and will PostTask
 212   // to the IO thread if necessary.
 213   void DnsPrefetchMotivatedList(const UrlList& urls,
 214                                 UrlInfo::ResolutionMotivation motivation);
 215
 216   // May be called from either the IO or UI thread and will PostTask
 217   // to the IO thread if necessary.
 218   void SaveStateForNextStartupAndTrim(PrefService* prefs);
 219
 220   void SaveDnsPrefetchStateForNextStartupAndTrim(
 221       base::ListValue* startup_list,
 222       base::ListValue* referral_list,
 223       base::WaitableEvent* completion);
 224
 225   // May be called from either the IO or UI thread and will PostTask
 226   // to the IO thread if necessary.
 227   void EnablePredictor(bool enable);
 228
 229   void EnablePredictorOnIOThread(bool enable);
 230
 231   // May be called from either the IO or UI thread and will PostTask
 232   // to the IO thread if necessary.
 233   void PreconnectUrl(const GURL& url, const GURL& first_party_for_cookies,
 234                      UrlInfo::ResolutionMotivation motivation, int count);
 235
 236   void PreconnectUrlOnIOThread(const GURL& url,
 237                                const GURL& first_party_for_cookies,
 238                                UrlInfo::ResolutionMotivation motivation,
 239                                int count);
 240
 241   void RecordPreconnectTrigger(const GURL& url);
 242
 243   void RecordPreconnectNavigationStat(const std::vector<GURL>& url_chain,
 244                                       bool is_subresource);
 245
 246   void RecordLinkNavigation(const GURL& url);
 247
 248   // ------------- End IO thread methods.
 249
 250   // The following methods may be called on either the IO or UI threads.
 251
 252   // Instigate pre-connection to any URLs, or pre-resolution of related host,
 253   // that we predict will be needed after this navigation (typically
 254   // more-embedded resources on a page).  This method will actually post a task
 255   // to do the actual work, so as not to jump ahead of the frame navigation that
 256   // instigated this activity.
 257   void PredictFrameSubresources(const GURL& url,
 258                                 const GURL& first_party_for_cookies);
 259
 260   // Put URL in canonical form, including a scheme, host, and port.
 261   // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
 262   // cannot be otherwise canonicalized.
 263   static GURL CanonicalizeUrl(const GURL& url);
 264
 265   // Used for testing.
 266   void SetHostResolver(net::HostResolver* host_resolver) {
 267     host_resolver_ = host_resolver;
 268   }
 269   // Used for testing.
 270   size_t max_concurrent_dns_lookups() const {
 271     return max_concurrent_dns_lookups_;
 272   }
 273   // Used for testing.
 274   void SetShutdown(bool shutdown) {
 275     shutdown_ = shutdown;
 276   }
 277
 278   // Flag setting to use preconnection instead of just DNS pre-fetching.
 279   bool preconnect_enabled() const {
 280     return preconnect_enabled_;
 281   }
 282
 283   // Flag setting for whether we are prefetching dns lookups.
 284   bool predictor_enabled() const {
 285     return predictor_enabled_;
 286   }
 287
 288
 289  private:
 290   FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
 291   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
 292   FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
 293   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
 294   FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
 295   FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
 296   FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
 297   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest);
 298   friend class WaitForResolutionHelper;  // For testing.
 299
 300   class LookupRequest;
 301
 302   // A simple priority queue for handling host names.
 303   // Some names that are queued up have |motivation| that requires very rapid
 304   // handling.  For example, a sub-resource name lookup MUST be done before the
 305   // actual sub-resource is fetched.  In contrast, a name that was speculatively
 306   // noted in a page has to be resolved before the user "gets around to"
 307   // clicking on a link.  By tagging (with a motivation) each push we make into
 308   // this FIFO queue, the queue can re-order the more important names to service
 309   // them sooner (relative to some low priority background resolutions).
 310   class HostNameQueue {
 311    public:
 312     HostNameQueue();
 313     ~HostNameQueue();
 314     void Push(const GURL& url,
 315               UrlInfo::ResolutionMotivation motivation);
 316     bool IsEmpty() const;
 317     GURL Pop();
 318
 319    private:
 320     // The names in the queue that should be serviced (popped) ASAP.
 321     std::queue<GURL> rush_queue_;
 322     // The names in the queue that should only be serviced when rush_queue is
 323     // empty.
 324     std::queue<GURL> background_queue_;
 325
 326   DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
 327   };
 328
 329   // The InitialObserver monitors navigations made by the network stack. This
 330   // is only used to identify startup time resolutions (for re-resolution
 331   // during our next process startup).
 332   // TODO(jar): Consider preconnecting at startup, which may be faster than
 333   // waiting for render process to start and request a connection.
 334   class InitialObserver {
 335    public:
 336     InitialObserver();
 337     ~InitialObserver();
 338     // Recording of when we observed each navigation.
 339     typedef std::map<GURL, base::TimeTicks> FirstNavigations;
 340
 341     // Potentially add a new URL to our startup list.
 342     void Append(const GURL& url, Predictor* predictor);
 343
 344     // Get an HTML version of our current planned first_navigations_.
 345     void GetFirstResolutionsHtml(std::string* output);
 346
 347     // Persist the current first_navigations_ for storage in a list.
 348     void GetInitialDnsResolutionList(base::ListValue* startup_list);
 349
 350     // Discards all initial loading history.
 351     void DiscardInitialNavigationHistory() { first_navigations_.clear(); }
 352
 353    private:
 354     // List of the first N URL resolutions observed in this run.
 355     FirstNavigations first_navigations_;
 356
 357     // The number of URLs we'll save for pre-resolving at next startup.
 358     static const size_t kStartupResolutionCount = 10;
 359   };
 360
 361   // A map that is keyed with the host/port that we've learned were the cause
 362   // of loading additional URLs.  The list of additional targets is held
 363   // in a Referrer instance, which is a value in this map.
 364   typedef std::map<GURL, Referrer> Referrers;
 365
 366   // Depending on the expected_subresource_use_, we may either make a TCP/IP
 367   // preconnection, or merely pre-resolve the hostname via DNS (or even do
 368   // nothing).  The following are the threasholds for taking those actions.
 369   static const double kPreconnectWorthyExpectedValue;
 370   static const double kDNSPreresolutionWorthyExpectedValue;
 371   // Referred hosts with a subresource_use_rate_ that are less than the
 372   // following threshold will be discarded when we Trim() the list.
 373   static const double kDiscardableExpectedValue;
 374   // During trimming operation to discard hosts for which we don't have likely
 375   // subresources, we multiply the expected_subresource_use_ value by the
 376   // following ratio until that value is less than kDiscardableExpectedValue.
 377   // This number should always be less than 1, an more than 0.
 378   static const double kReferrerTrimRatio;
 379
 380   // Interval between periodic trimming of our whole referrer list.
 381   // We only do a major trimming about once an hour, and then only when the user
 382   // is actively browsing.
 383   static const int64 kDurationBetweenTrimmingsHours;
 384   // Interval between incremental trimmings (to avoid inducing Jank).
 385   static const int64 kDurationBetweenTrimmingIncrementsSeconds;
 386   // Number of referring URLs processed in an incremental trimming.
 387   static const size_t kUrlsTrimmedPerIncrement;
 388
 389   // Only for testing. Returns true if hostname has been successfully resolved
 390   // (name found).
 391   bool WasFound(const GURL& url) const {
 392     Results::const_iterator it(results_.find(url));
 393     return (it != results_.end()) &&
 394             it->second.was_found();
 395   }
 396
 397   // Only for testing. Return how long was the resolution
 398   // or UrlInfo::NullDuration() if it hasn't been resolved yet.
 399   base::TimeDelta GetResolutionDuration(const GURL& url) {
 400     if (results_.find(url) == results_.end())
 401       return UrlInfo::NullDuration();
 402     return results_[url].resolve_duration();
 403   }
 404
 405   // Only for testing;
 406   size_t peak_pending_lookups() const { return peak_pending_lookups_; }
 407
 408   // ------------- Start IO thread methods.
 409
 410   // Perform actual resolution or preconnection to subresources now.  This is
 411   // an internal worker method that is reached via a post task from
 412   // PredictFrameSubresources().
 413   void PrepareFrameSubresources(const GURL& url,
 414                                 const GURL& first_party_for_cookies);
 415
 416   // Access method for use by async lookup request to pass resolution result.
 417   void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);
 418
 419   // Underlying method for both async and synchronous lookup to update state.
 420   void LookupFinished(LookupRequest* request,
 421                       const GURL& url, bool found);
 422
 423   // Queue hostname for resolution.  If queueing was done, return the pointer
 424   // to the queued instance, otherwise return NULL.
 425   UrlInfo* AppendToResolutionQueue(const GURL& url,
 426       UrlInfo::ResolutionMotivation motivation);
 427
 428   // Check to see if too much queuing delay has been noted for the given info,
 429   // which indicates that there is "congestion" or growing delay in handling the
 430   // resolution of names.  Rather than letting this congestion potentially grow
 431   // without bounds, we abandon our queued efforts at pre-resolutions in such a
 432   // case.
 433   // To do this, we will recycle |info|, as well as all queued items, back to
 434   // the state they had before they were queued up.  We can't do anything about
 435   // the resolutions we've already sent off for processing on another thread, so
 436   // we just let them complete.  On a slow system, subject to congestion, this
 437   // will greatly reduce the number of resolutions done, but it will assure that
 438   // any resolutions that are done, are in a timely and hence potentially
 439   // helpful manner.
 440   bool CongestionControlPerformed(UrlInfo* info);
 441
 442   // Take lookup requests from work_queue_ and tell HostResolver to look them up
 443   // asynchronously, provided we don't exceed concurrent resolution limit.
 444   void StartSomeQueuedResolutions();
 445
 446   // Performs trimming similar to TrimReferrersNow(), except it does it as a
 447   // series of short tasks by posting continuations again an again until done.
 448   void TrimReferrers();
 449
 450   // Loads urls_being_trimmed_ from keys of current referrers_.
 451   void LoadUrlsForTrimming();
 452
 453   // Posts a task to do additional incremental trimming of referrers_.
 454   void PostIncrementalTrimTask();
 455
 456   // Calls Trim() on some or all of urls_being_trimmed_.
 457   // If it does not process all the URLs in that vector, it posts a task to
 458   // continue with them shortly (i.e., it yeilds and continues).
 459   void IncrementalTrimReferrers(bool trim_all_now);
 460
 461   // ------------- End IO thread methods.
 462
 463   scoped_ptr<InitialObserver> initial_observer_;
 464
 465   // Reference to URLRequestContextGetter from the Profile which owns the
 466   // predictor. Used by Preconnect.
 467   scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
 468
 469   // Status of speculative DNS resolution and speculative TCP/IP connection
 470   // feature.
 471   bool predictor_enabled_;
 472
 473   // work_queue_ holds a list of names we need to look up.
 474   HostNameQueue work_queue_;
 475
 476   // results_ contains information for existing/prior prefetches.
 477   Results results_;
 478
 479   std::set<LookupRequest*> pending_lookups_;
 480
 481   // For testing, to verify that we don't exceed the limit.
 482   size_t peak_pending_lookups_;
 483
 484   // When true, we don't make new lookup requests.
 485   bool shutdown_;
 486
 487   // The number of concurrent speculative lookups currently allowed to be sent
 488   // to the resolver.  Any additional lookups will be queued to avoid exceeding
 489   // this value.  The queue is a priority queue that will accelerate
 490   // sub-resource speculation, and retard resolutions suggested by page scans.
 491   const size_t max_concurrent_dns_lookups_;
 492
 493   // The maximum queueing delay that is acceptable before we enter congestion
 494   // reduction mode, and discard all queued (but not yet assigned) resolutions.
 495   const base::TimeDelta max_dns_queue_delay_;
 496
 497   // The host resolver we warm DNS entries for.
 498   net::HostResolver* host_resolver_;
 499
 500   // Are we currently using preconnection, rather than just DNS resolution, for
 501   // subresources and omni-box search URLs.
 502   bool preconnect_enabled_;
 503
 504   // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
 505   std::string last_omnibox_host_;
 506
 507   // The time when the last preresolve was done for last_omnibox_host_.
 508   base::TimeTicks last_omnibox_preresolve_;
 509
 510   // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
 511   // preconnecting (because it was to a search service).
 512   int consecutive_omnibox_preconnect_count_;
 513
 514   // The time when the last preconnection was requested to a search service.
 515   base::TimeTicks last_omnibox_preconnect_;
 516
 517   class PreconnectUsage;
 518   scoped_ptr<PreconnectUsage> preconnect_usage_;
 519
 520   // For each URL that we might navigate to (that we've "learned about")
 521   // we have a Referrer list. Each Referrer list has all hostnames we might
 522   // need to pre-resolve or pre-connect to when there is a navigation to the
 523   // orginial hostname.
 524   Referrers referrers_;
 525
 526   // List of URLs in referrers_ currently being trimmed (scaled down to
 527   // eventually be aged out of use).
 528   std::vector<GURL> urls_being_trimmed_;
 529
 530   // A time after which we need to do more trimming of referrers.
 531   base::TimeTicks next_trim_time_;
 532
 533   scoped_ptr<base::WeakPtrFactory<Predictor> > weak_factory_;
 534
 535   DISALLOW_COPY_AND_ASSIGN(Predictor);
 536 };
 537
 538 // This version of the predictor is used for testing.
 539 class SimplePredictor : public Predictor {
 540  public:
 541   explicit SimplePredictor(bool preconnect_enabled)
 542       : Predictor(preconnect_enabled) {}
 543   virtual ~SimplePredictor() {}
 544   virtual void InitNetworkPredictor(
 545       PrefService* user_prefs,
 546       PrefService* local_state,
 547       IOThread* io_thread,
 548       net::URLRequestContextGetter* getter) OVERRIDE;
 549   virtual void ShutdownOnUIThread() OVERRIDE;
 550 };
 551
 552 }  // namespace chrome_browser_net
 553
 554 #endif  // CHROME_BROWSER_NET_PREDICTOR_H_