1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // BrowserFeatureExtractor computes various browser features for client-side
6 // phishing detection. For now it does a bunch of lookups in the history
7 // service to see whether a particular URL has been visited before by the
10 #ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
11 #define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
19 #include "base/basictypes.h"
20 #include "base/callback.h"
21 #include "base/containers/hash_tables.h"
22 #include "base/memory/scoped_ptr.h"
23 #include "base/sequenced_task_runner_helpers.h"
24 #include "base/time/time.h"
25 #include "chrome/browser/common/cancelable_request.h"
26 #include "chrome/browser/history/history_types.h"
27 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
28 #include "chrome/browser/safe_browsing/ui_manager.h"
37 namespace safe_browsing {
38 class ClientMalwareRequest;
39 class ClientPhishingRequest;
40 class ClientSideDetectionService;
42 typedef std::map<std::string, std::set<std::string> > IPUrlMap;
45 // List of IPv4 and IPv6 addresses from which content was requested
46 // together with the hosts on it, while browsing to the |url|.
49 // If a SafeBrowsing interstitial was shown for the current URL
50 // this will contain the UnsafeResource struct for that URL.
51 scoped_ptr<SafeBrowsingUIManager::UnsafeResource> unsafe_resource;
53 // List of redirects that lead to the first page on the current host and
54 // the current url respectively. These may be the same if the current url
55 // is the first page on its host.
56 std::vector<GURL> host_redirects;
57 std::vector<GURL> url_redirects;
59 // The HTTP status code from this navigation.
66 // All methods of this class must be called on the UI thread (including
68 class BrowserFeatureExtractor {
70 // Called when feature extraction is done. The first argument will be
71 // true iff feature extraction succeeded. The second argument is the
72 // phishing request which was modified by the feature extractor. The
73 // DoneCallback takes ownership of the request object.
74 typedef base::Callback<void(bool, ClientPhishingRequest*)> DoneCallback;
75 typedef base::Callback<void(bool, ClientMalwareRequest*)> MalwareDoneCallback;
77 // The caller keeps ownership of the tab and service objects and is
78 // responsible for ensuring that they stay valid for the entire
79 // lifetime of this object.
80 BrowserFeatureExtractor(content::WebContents* tab,
81 ClientSideDetectionService* service);
83 // The destructor will cancel any pending requests.
84 virtual ~BrowserFeatureExtractor();
86 // Begins extraction of the browser features. We take ownership
87 // of the request object until |callback| is called (see DoneCallback above)
88 // and will write the extracted features to the feature map. Once the
89 // feature extraction is complete, |callback| is run on the UI thread. We
90 // take ownership of the |callback| object. |info| may not be valid after
91 // ExtractFeatures returns. This method must run on the UI thread.
92 virtual void ExtractFeatures(const BrowseInfo* info,
93 ClientPhishingRequest* request,
94 const DoneCallback& callback);
96 // Extract the malware related features. The request object is owned by the
98 virtual void ExtractMalwareFeatures(const BrowseInfo* info,
99 ClientMalwareRequest* request);
102 friend class base::DeleteHelper<BrowserFeatureExtractor>;
103 typedef std::pair<ClientPhishingRequest*, DoneCallback> ExtractionData;
104 typedef std::map<CancelableRequestProvider::Handle,
105 ExtractionData> PendingQueriesMap;
107 // Synchronous browser feature extraction.
108 void ExtractBrowseInfoFeatures(const BrowseInfo& info,
109 ClientPhishingRequest* request);
111 // Actually starts feature extraction (does the real work).
112 void StartExtractFeatures(ClientPhishingRequest* request,
113 const DoneCallback& callback);
115 // HistoryService callback which is called when we're done querying URL visits
117 void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle,
119 const history::URLRow* row,
120 history::VisitVector* visits);
122 // HistoryService callback which is called when we're done querying HTTP host
123 // visits in the history.
124 void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle,
127 base::Time first_visit);
129 // HistoryService callback which is called when we're done querying HTTPS host
130 // visits in the history.
131 void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle,
134 base::Time first_visit);
136 // Helper function which sets the host history features given the
137 // number of host visits and the time of the fist host visit. Set
138 // |is_http_query| to true if the URL scheme is HTTP and to false if
139 // the scheme is HTTPS.
140 void SetHostVisitsFeatures(int num_visits,
141 base::Time first_visit,
143 ClientPhishingRequest* request);
145 // Helper function which stores the request and callback while the history
146 // query is being processed.
147 void StorePendingQuery(CancelableRequestProvider::Handle handle,
148 ClientPhishingRequest* request,
149 const DoneCallback& callback);
151 // Helper function which is the counterpart of StorePendingQuery. If there
152 // is a pending query for the given handle it will return false and set both
153 // the request and cb pointers. Otherwise, it will return false.
154 bool GetPendingQuery(CancelableRequestProvider::Handle handle,
155 ClientPhishingRequest** request,
156 DoneCallback* callback);
158 // Helper function which gets the history server if possible. If the pointer
159 // is set it will return true and false otherwise.
160 bool GetHistoryService(HistoryService** history);
162 content::WebContents* tab_;
163 ClientSideDetectionService* service_;
164 CancelableRequestConsumer request_consumer_;
165 base::WeakPtrFactory<BrowserFeatureExtractor> weak_factory_;
167 // Set of pending extractions (i.e. extractions for which ExtractFeatures was
168 // called but not StartExtractFeatures).
169 std::map<ClientPhishingRequest*, DoneCallback> pending_extractions_;
171 // Set of pending queries (i.e., where history->Query...() was called but
172 // the history callback hasn't been invoked yet).
173 PendingQueriesMap pending_queries_;
175 // Max number of malware IPs can be sent in one malware request
176 static const int kMaxMalwareIPPerRequest;
178 DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor);
181 } // namespace safe_browsing
182 #endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_