src/chrome/browser/safe_browsing/client_side_detection_service.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4 //
   5 // Helper class which handles communication with the SafeBrowsing backends for
   6 // client-side phishing detection.  This class is used to fetch the client-side
   7 // model and send it to all renderers.  This class is also used to send a ping
   8 // back to Google to verify if a particular site is really phishing or not.
   9 //
  10 // This class is not thread-safe and expects all calls to be made on the UI
  11 // thread.  We also expect that the calling thread runs a message loop.
  12
  13 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
  14 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
  15
  16 #include <map>
  17 #include <queue>
  18 #include <set>
  19 #include <string>
  20 #include <utility>
  21 #include <vector>
  22
  23 #include "base/basictypes.h"
  24 #include "base/callback_forward.h"
  25 #include "base/gtest_prod_util.h"
  26 #include "base/memory/linked_ptr.h"
  27 #include "base/memory/ref_counted.h"
  28 #include "base/memory/scoped_ptr.h"
  29 #include "base/memory/weak_ptr.h"
  30 #include "base/time/time.h"
  31 #include "content/public/browser/notification_observer.h"
  32 #include "content/public/browser/notification_registrar.h"
  33 #include "net/base/net_util.h"
  34 #include "net/url_request/url_fetcher_delegate.h"
  35 #include "url/gurl.h"
  36
  37 class SafeBrowsingService;
  38
  39 namespace base {
  40 class TimeDelta;
  41 }
  42
  43 namespace content {
  44 class RenderProcessHost;
  45 }
  46
  47 namespace net {
  48 class URLFetcher;
  49 class URLRequestContextGetter;
  50 class URLRequestStatus;
  51 typedef std::vector<std::string> ResponseCookies;
  52 }  // namespace net
  53
  54 namespace safe_browsing {
  55 class ClientMalwareRequest;
  56 class ClientPhishingRequest;
  57 class ClientPhishingResponse;
  58 class ClientSideModel;
  59
  60 class ClientSideDetectionService : public net::URLFetcherDelegate,
  61                                    public content::NotificationObserver {
  62  public:
  63   // void(GURL phishing_url, bool is_phishing).
  64   typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback;
  65   // void(GURL original_url, GURL malware_url, bool is_malware).
  66   typedef base::Callback<void(GURL, GURL, bool)>
  67       ClientReportMalwareRequestCallback;
  68
  69   virtual ~ClientSideDetectionService();
  70
  71   // Creates a client-side detection service.  The service is initially
  72   // disabled, use SetEnabledAndRefreshState() to start it.  The caller takes
  73   // ownership of the object.  This function may return NULL.
  74   static ClientSideDetectionService* Create(
  75       net::URLRequestContextGetter* request_context_getter);
  76
  77   // Enables or disables the service, and refreshes the state of all renderers.
  78   // This is usually called by the SafeBrowsingService, which tracks whether
  79   // any profile uses these services at all.  Disabling cancels any pending
  80   // requests; existing ClientSideDetectionHosts will have their callbacks
  81   // called with "false" verdicts.  Enabling starts downloading the model after
  82   // a delay.  In all cases, each render process is updated to match the state
  83   // of the SafeBrowsing preference for that profile.
  84   void SetEnabledAndRefreshState(bool enabled);
  85
  86   bool enabled() const {
  87     return enabled_;
  88   }
  89
  90   // From the net::URLFetcherDelegate interface.
  91   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
  92
  93   // content::NotificationObserver overrides:
  94   virtual void Observe(int type,
  95                        const content::NotificationSource& source,
  96                        const content::NotificationDetails& details) OVERRIDE;
  97
  98   // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
  99   // The URL scheme of the |url()| in the request should be HTTP.  This method
 100   // takes ownership of the |verdict| as well as the |callback| and calls the
 101   // the callback once the result has come back from the server or if an error
 102   // occurs during the fetch.  If the service is disabled or an error occurs
 103   // the phishing verdict will always be false.  The callback is always called
 104   // after SendClientReportPhishingRequest() returns and on the same thread as
 105   // SendClientReportPhishingRequest() was called.  You may set |callback| to
 106   // NULL if you don't care about the server verdict.
 107   virtual void SendClientReportPhishingRequest(
 108       ClientPhishingRequest* verdict,
 109       const ClientReportPhishingRequestCallback& callback);
 110
 111   // Similar to above one, instead send ClientMalwareRequest
 112   virtual void SendClientReportMalwareRequest(
 113       ClientMalwareRequest* verdict,
 114       const ClientReportMalwareRequestCallback& callback);
 115
 116   // Returns true if the given IP address string falls within a private
 117   // (unroutable) network block.  Pages which are hosted on these IP addresses
 118   // are exempt from client-side phishing detection.  This is called by the
 119   // ClientSideDetectionHost prior to sending the renderer a
 120   // SafeBrowsingMsg_StartPhishingDetection IPC.
 121   //
 122   // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
 123   // address.
 124   virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
 125
 126   // Returns true if the given IP address is on the list of known bad IPs.
 127   // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
 128   // address.
 129   virtual bool IsBadIpAddress(const std::string& ip_address) const;
 130
 131   // Returns true and sets is_phishing if url is in the cache and valid.
 132   virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
 133
 134   // Returns true if the url is in the cache.
 135   virtual bool IsInCache(const GURL& url);
 136
 137   // Returns true if we have sent more than kMaxReportsPerInterval phishing
 138   // reports in the last kReportsInterval.
 139   virtual bool OverPhishingReportLimit();
 140
 141   // Returns true if we have sent more than kMaxReportsPerInterval malware
 142   // reports in the last kReportsInterval.
 143   virtual bool OverMalwareReportLimit();
 144
 145  protected:
 146   // Use Create() method to create an instance of this object.
 147   explicit ClientSideDetectionService(
 148       net::URLRequestContextGetter* request_context_getter);
 149
 150   // Enum used to keep stats about why we fail to get the client model.
 151   enum ClientModelStatus {
 152     MODEL_SUCCESS,
 153     MODEL_NOT_CHANGED,
 154     MODEL_FETCH_FAILED,
 155     MODEL_EMPTY,
 156     MODEL_TOO_LARGE,
 157     MODEL_PARSE_ERROR,
 158     MODEL_MISSING_FIELDS,
 159     MODEL_INVALID_VERSION_NUMBER,
 160     MODEL_BAD_HASH_IDS,
 161     MODEL_STATUS_MAX  // Always add new values before this one.
 162   };
 163
 164   // Starts fetching the model from the network or the cache.  This method
 165   // is called periodically to check whether a new client model is available
 166   // for download.
 167   void StartFetchModel();
 168
 169   // Schedules the next fetch of the model.
 170   virtual void ScheduleFetchModel(int64 delay_ms);  // Virtual for testing.
 171
 172   // This method is called when we're done fetching the model either because
 173   // we hit an error somewhere or because we're actually done fetch and
 174   // validating the model.
 175   virtual void EndFetchModel(ClientModelStatus status);  // Virtual for testing.
 176
 177  private:
 178   friend class ClientSideDetectionServiceTest;
 179   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest);
 180   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetBadSubnets);
 181   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
 182                            SetEnabledAndRefreshState);
 183   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
 184   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
 185                            ModelHasValidHashIds);
 186
 187   // CacheState holds all information necessary to respond to a caller without
 188   // actually making a HTTP request.
 189   struct CacheState {
 190     bool is_phishing;
 191     base::Time timestamp;
 192
 193     CacheState(bool phish, base::Time time);
 194   };
 195   typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
 196
 197   // A tuple of (IP address block, prefix size) representing a private
 198   // IP address range.
 199   typedef std::pair<net::IPAddressNumber, size_t> AddressRange;
 200
 201   // Maps a IPv6 subnet mask to a set of hashed IPv6 subnets.  The IPv6
 202   // subnets are in network order and hashed with sha256.
 203   typedef std::map<std::string /* subnet mask */,
 204                    std::set<std::string /* hashed subnet */> > BadSubnetMap;
 205
 206   static const char kClientReportMalwareUrl[];
 207   static const char kClientReportPhishingUrl[];
 208   static const char kClientModelUrl[];
 209   static const size_t kMaxModelSizeBytes;
 210   static const int kMaxReportsPerInterval;
 211   static const int kClientModelFetchIntervalMs;
 212   static const int kInitialClientModelFetchDelayMs;
 213   static const int kReportsIntervalDays;
 214   static const int kNegativeCacheIntervalDays;
 215   static const int kPositiveCacheIntervalMinutes;
 216
 217   // Starts sending the request to the client-side detection frontends.
 218   // This method takes ownership of both pointers.
 219   void StartClientReportPhishingRequest(
 220       ClientPhishingRequest* verdict,
 221       const ClientReportPhishingRequestCallback& callback);
 222
 223   void StartClientReportMalwareRequest(
 224       ClientMalwareRequest* verdict,
 225       const ClientReportMalwareRequestCallback& callback);
 226
 227   // Called by OnURLFetchComplete to handle the response from fetching the
 228   // model.
 229   void HandleModelResponse(const net::URLFetcher* source,
 230                            const GURL& url,
 231                            const net::URLRequestStatus& status,
 232                            int response_code,
 233                            const net::ResponseCookies& cookies,
 234                            const std::string& data);
 235
 236   // Called by OnURLFetchComplete to handle the server response from
 237   // sending the client-side phishing request.
 238   void HandlePhishingVerdict(const net::URLFetcher* source,
 239                              const GURL& url,
 240                              const net::URLRequestStatus& status,
 241                              int response_code,
 242                              const net::ResponseCookies& cookies,
 243                              const std::string& data);
 244
 245   // Called by OnURLFetchComplete to handle the server response from
 246   // sending the client-side malware request.
 247   void HandleMalwareVerdict(const net::URLFetcher* source,
 248                             const GURL& url,
 249                             const net::URLRequestStatus& status,
 250                             int response_code,
 251                             const net::ResponseCookies& cookies,
 252                             const std::string& data);
 253
 254   // Invalidate cache results which are no longer useful.
 255   void UpdateCache();
 256
 257   // Get the number of malware reports that we have sent over kReportsInterval.
 258   int GetMalwareNumReports();
 259
 260   // Get the number of phishing reports that we have sent over kReportsInterval.
 261   int GetPhishingNumReports();
 262
 263   // Get the number of reports that we have sent over kReportsInterval, and
 264   // trims off the old elements.
 265   int GetNumReports(std::queue<base::Time>* report_times);
 266
 267   // Initializes the |private_networks_| vector with the network blocks
 268   // that we consider non-public IP addresses.  Returns true on success.
 269   bool InitializePrivateNetworks();
 270
 271   // Send the model to the given renderer.
 272   void SendModelToProcess(content::RenderProcessHost* process);
 273
 274   // Same as above but sends the model to all rendereres.
 275   void SendModelToRenderers();
 276
 277   // Reads the bad subnets from the client model and inserts them into
 278   // |bad_subnets| for faster lookups.  This method is static to simplify
 279   // testing.
 280   static void SetBadSubnets(const ClientSideModel& model,
 281                             BadSubnetMap* bad_subnets);
 282
 283
 284   // Returns true iff all the hash id's in the client-side model point to
 285   // valid hashes in the model.
 286   static bool ModelHasValidHashIds(const ClientSideModel& model);
 287
 288   // Returns the URL that will be used for phishing requests.
 289   static GURL GetClientReportUrl(const std::string& report_url);
 290
 291   // Whether the service is running or not.  When the service is not running,
 292   // it won't download the model nor report detected phishing URLs.
 293   bool enabled_;
 294
 295   std::string model_str_;
 296   scoped_ptr<ClientSideModel> model_;
 297   scoped_ptr<base::TimeDelta> model_max_age_;
 298   scoped_ptr<net::URLFetcher> model_fetcher_;
 299
 300   // Map of client report phishing request to the corresponding callback that
 301   // has to be invoked when the request is done.
 302   struct ClientReportInfo;
 303   std::map<const net::URLFetcher*, ClientReportInfo*>
 304       client_phishing_reports_;
 305   // Map of client malware ip request to the corresponding callback that
 306   // has to be invoked when the request is done.
 307   struct ClientMalwareReportInfo;
 308   std::map<const net::URLFetcher*, ClientMalwareReportInfo*>
 309       client_malware_reports_;
 310
 311   // Cache of completed requests. Used to satisfy requests for the same urls
 312   // as long as the next request falls within our caching window (which is
 313   // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
 314   // size of this cache is limited by kMaxReportsPerDay *
 315   // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
 316   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
 317   PhishingCache cache_;
 318
 319   // Timestamp of when we sent a phishing request. Used to limit the number
 320   // of phishing requests that we send in a day.
 321   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
 322   std::queue<base::Time> phishing_report_times_;
 323
 324   // Timestamp of when we sent a malware request. Used to limit the number
 325   // of malware requests that we send in a day.
 326   std::queue<base::Time> malware_report_times_;
 327
 328   // Used to asynchronously call the callbacks for
 329   // SendClientReportPhishingRequest.
 330   base::WeakPtrFactory<ClientSideDetectionService> weak_factory_;
 331
 332   // The context we use to issue network requests.
 333   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
 334
 335   // The network blocks that we consider private IP address ranges.
 336   std::vector<AddressRange> private_networks_;
 337
 338   // Map of bad subnets which are copied from the client model and put into
 339   // this map to speed up lookups.
 340   BadSubnetMap bad_subnets_;
 341
 342   content::NotificationRegistrar registrar_;
 343
 344   DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
 345 };
 346 }  // namespace safe_browsing
 347
 348 #endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_