Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / extensions / activity_log / uma_policy.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/activity_log/uma_policy.h"
6
7 #include "base/metrics/histogram.h"
8 #include "base/strings/stringprintf.h"
9 #include "chrome/browser/browser_process.h"
10 #include "chrome/browser/extensions/active_script_controller.h"
11 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
12 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
13 #include "chrome/browser/sessions/session_tab_helper.h"
14 #include "chrome/browser/ui/browser.h"
15 #include "chrome/browser/ui/browser_list.h"
16 #include "chrome/browser/ui/tabs/tab_strip_model.h"
17 #include "chrome/common/url_constants.h"
18 #include "content/public/browser/web_contents.h"
19 #include "content/public/common/url_constants.h"
20 #include "extensions/browser/extension_registry.h"
21 #include "extensions/common/dom_action_types.h"
22 #include "extensions/common/extension.h"
23 #include "extensions/common/manifest.h"
24
25 namespace extensions {
26
27 namespace {
28
29 // For convenience.
30 const int kNoStatus           = UmaPolicy::NONE;
31 const int kContentScript      = 1 << UmaPolicy::CONTENT_SCRIPT;
32 const int kReadDom            = 1 << UmaPolicy::READ_DOM;
33 const int kModifiedDom        = 1 << UmaPolicy::MODIFIED_DOM;
34 const int kDomMethod          = 1 << UmaPolicy::DOM_METHOD;
35 const int kDocumentWrite      = 1 << UmaPolicy::DOCUMENT_WRITE;
36 const int kInnerHtml          = 1 << UmaPolicy::INNER_HTML;
37 const int kCreatedScript      = 1 << UmaPolicy::CREATED_SCRIPT;
38 const int kCreatedIframe      = 1 << UmaPolicy::CREATED_IFRAME;
39 const int kCreatedDiv         = 1 << UmaPolicy::CREATED_DIV;
40 const int kCreatedLink        = 1 << UmaPolicy::CREATED_LINK;
41 const int kCreatedInput       = 1 << UmaPolicy::CREATED_INPUT;
42 const int kCreatedEmbed       = 1 << UmaPolicy::CREATED_EMBED;
43 const int kCreatedObject      = 1 << UmaPolicy::CREATED_OBJECT;
44 const int kAdInjected         = 1 << UmaPolicy::AD_INJECTED;
45 const int kAdRemoved          = 1 << UmaPolicy::AD_REMOVED;
46 const int kAdReplaced         = 1 << UmaPolicy::AD_REPLACED;
47 const int kAdLikelyInjected   = 1 << UmaPolicy::AD_LIKELY_INJECTED;
48 const int kAdLikelyReplaced   = 1 << UmaPolicy::AD_LIKELY_REPLACED;
49
50 // A mask of all the ad injection flags.
51 const int kAnyAdActivity = kAdInjected |
52                            kAdRemoved |
53                            kAdReplaced |
54                            kAdLikelyInjected |
55                            kAdLikelyReplaced;
56
57 }  // namespace
58
59 // Class constants, also used in testing. --------------------------------------
60
61 const char UmaPolicy::kNumberOfTabs[]       = "num_tabs";
62 const size_t UmaPolicy::kMaxTabsTracked     = 50;
63
64 // Setup and shutdown. ---------------------------------------------------------
65
66 UmaPolicy::UmaPolicy(Profile* profile)
67     : ActivityLogPolicy(profile), profile_(profile) {
68   DCHECK(!profile->IsOffTheRecord());
69   BrowserList::AddObserver(this);
70 }
71
72 UmaPolicy::~UmaPolicy() {
73   BrowserList::RemoveObserver(this);
74 }
75
76 // Unlike the other policies, UmaPolicy can commit suicide directly because it
77 // doesn't have a dependency on a database.
78 void UmaPolicy::Close() {
79   delete this;
80 }
81
82 // Process actions. ------------------------------------------------------------
83
84 void UmaPolicy::ProcessAction(scoped_refptr<Action> action) {
85   if (!action->page_url().is_valid() && !action->arg_url().is_valid())
86     return;
87   if (action->page_incognito() || action->arg_incognito())
88     return;
89   std::string url;
90   int status = MatchActionToStatus(action);
91   if (action->page_url().is_valid()) {
92     url = CleanURL(action->page_url());
93   } else if (status & kContentScript) {
94     // This is for the tabs.executeScript case.
95     url = CleanURL(action->arg_url());
96   }
97   if (url.empty())
98     return;
99
100   SiteMap::iterator site_lookup = url_status_.find(url);
101   if (site_lookup != url_status_.end())
102     site_lookup->second[action->extension_id()] |= status;
103 }
104
105 int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) {
106   if (action->action_type() == Action::ACTION_CONTENT_SCRIPT)
107     return kContentScript;
108   if (action->action_type() == Action::ACTION_API_CALL &&
109       action->api_name() == "tabs.executeScript")
110     return kContentScript;
111   if (action->action_type() != Action::ACTION_DOM_ACCESS)
112     return kNoStatus;
113
114   int dom_verb = DomActionType::MODIFIED;
115   if (!action->other() ||
116       !action->other()->GetIntegerWithoutPathExpansion(
117           activity_log_constants::kActionDomVerb, &dom_verb))
118     return kNoStatus;
119
120   int ret_bit = kNoStatus;
121   DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb);
122   if (dom_type == DomActionType::GETTER)
123     return kReadDom;
124   if (dom_type == DomActionType::SETTER)
125     ret_bit |= kModifiedDom;
126   else if (dom_type == DomActionType::METHOD)
127     ret_bit |= kDomMethod;
128   else
129     return kNoStatus;
130
131   if (action->api_name() == "HTMLDocument.write" ||
132       action->api_name() == "HTMLDocument.writeln") {
133     ret_bit |= kDocumentWrite;
134   } else if (action->api_name() == "Element.innerHTML") {
135     ret_bit |= kInnerHtml;
136   } else if (action->api_name() == "Document.createElement") {
137     std::string arg;
138     action->args()->GetString(0, &arg);
139     if (arg == "script")
140       ret_bit |= kCreatedScript;
141     else if (arg == "iframe")
142       ret_bit |= kCreatedIframe;
143     else if (arg == "div")
144       ret_bit |= kCreatedDiv;
145     else if (arg == "a")
146       ret_bit |= kCreatedLink;
147     else if (arg == "input")
148       ret_bit |= kCreatedInput;
149     else if (arg == "embed")
150       ret_bit |= kCreatedEmbed;
151     else if (arg == "object")
152       ret_bit |= kCreatedObject;
153   }
154
155   const Action::InjectionType ad_injection =
156       action->DidInjectAd(g_browser_process->rappor_service());
157   switch (ad_injection) {
158     case Action::INJECTION_NEW_AD:
159       ret_bit |= kAdInjected;
160       break;
161     case Action::INJECTION_REMOVED_AD:
162       ret_bit |= kAdRemoved;
163       break;
164     case Action::INJECTION_REPLACED_AD:
165       ret_bit |= kAdReplaced;
166       break;
167     case Action::INJECTION_LIKELY_NEW_AD:
168       ret_bit |= kAdLikelyInjected;
169       break;
170     case Action::INJECTION_LIKELY_REPLACED_AD:
171       ret_bit |= kAdLikelyReplaced;
172       break;
173     case Action::NO_AD_INJECTION:
174       break;
175     case Action::NUM_INJECTION_TYPES:
176       NOTREACHED();
177   }
178
179   return ret_bit;
180 }
181
182 void UmaPolicy::HistogramOnClose(const std::string& cleaned_url,
183                                  content::WebContents* web_contents) {
184   // Let's try to avoid histogramming useless URLs.
185   if (cleaned_url.empty() || cleaned_url == url::kAboutBlankURL ||
186       cleaned_url == chrome::kChromeUINewTabURL)
187     return;
188
189   int statuses[MAX_STATUS - 1];
190   std::memset(statuses, 0, sizeof(statuses));
191
192   ActiveScriptController* active_script_controller =
193       ActiveScriptController::GetForWebContents(web_contents);
194   SiteMap::iterator site_lookup = url_status_.find(cleaned_url);
195   const ExtensionMap& exts = site_lookup->second;
196   std::set<std::string> ad_injectors;
197   for (ExtensionMap::const_iterator ext_iter = exts.begin();
198        ext_iter != exts.end();
199        ++ext_iter) {
200     if (ext_iter->first == kNumberOfTabs)
201       continue;
202     for (int i = NONE + 1; i < MAX_STATUS; ++i) {
203       if (ext_iter->second & (1 << i))
204         statuses[i-1]++;
205     }
206
207     if (ext_iter->second & kAnyAdActivity)
208       ad_injectors.insert(ext_iter->first);
209   }
210   if (active_script_controller)
211     active_script_controller->OnAdInjectionDetected(ad_injectors);
212
213   ExtensionRegistry* registry = ExtensionRegistry::Get(profile_);
214   for (std::set<std::string>::const_iterator iter = ad_injectors.begin();
215        iter != ad_injectors.end();
216        ++iter) {
217     const Extension* extension =
218         registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING);
219     if (extension) {
220       UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation",
221                                 extension->location(),
222                                 Manifest::NUM_LOCATIONS);
223     }
224   }
225
226   std::string prefix = "ExtensionActivity.";
227   if (GURL(cleaned_url).host() != "www.google.com") {
228     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
229                              statuses[CONTENT_SCRIPT - 1]);
230     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
231                              statuses[READ_DOM - 1]);
232     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
233                              statuses[MODIFIED_DOM - 1]);
234     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
235                              statuses[DOM_METHOD - 1]);
236     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
237                              statuses[DOCUMENT_WRITE - 1]);
238     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
239                              statuses[INNER_HTML - 1]);
240     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
241                              statuses[CREATED_SCRIPT - 1]);
242     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
243                              statuses[CREATED_IFRAME - 1]);
244     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
245                              statuses[CREATED_DIV - 1]);
246     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
247                              statuses[CREATED_LINK - 1]);
248     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
249                              statuses[CREATED_INPUT - 1]);
250     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
251                              statuses[CREATED_EMBED - 1]);
252     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
253                              statuses[CREATED_OBJECT - 1]);
254     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
255                              statuses[AD_INJECTED - 1]);
256     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
257                              statuses[AD_REMOVED - 1]);
258     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
259                              statuses[AD_REPLACED - 1]);
260     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
261                              statuses[AD_LIKELY_INJECTED - 1]);
262     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
263                              statuses[AD_LIKELY_REPLACED - 1]);
264   } else {
265     prefix += "Google.";
266     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
267                              statuses[CONTENT_SCRIPT - 1]);
268     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
269                              statuses[READ_DOM - 1]);
270     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
271                              statuses[MODIFIED_DOM - 1]);
272     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
273                              statuses[DOM_METHOD - 1]);
274     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
275                              statuses[DOCUMENT_WRITE - 1]);
276     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
277                              statuses[INNER_HTML - 1]);
278     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
279                              statuses[CREATED_SCRIPT - 1]);
280     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
281                              statuses[CREATED_IFRAME - 1]);
282     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
283                              statuses[CREATED_DIV - 1]);
284     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
285                              statuses[CREATED_LINK - 1]);
286     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
287                              statuses[CREATED_INPUT - 1]);
288     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
289                              statuses[CREATED_EMBED - 1]);
290     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
291                              statuses[CREATED_OBJECT - 1]);
292     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
293                              statuses[AD_INJECTED - 1]);
294     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
295                              statuses[AD_REMOVED - 1]);
296     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
297                              statuses[AD_REPLACED - 1]);
298     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
299                              statuses[AD_LIKELY_INJECTED - 1]);
300     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
301                              statuses[AD_LIKELY_REPLACED - 1]);
302   }
303 }
304
305 // Handle tab tracking. --------------------------------------------------------
306
307 void UmaPolicy::OnBrowserAdded(Browser* browser) {
308   if (!profile_->IsSameProfile(browser->profile()))
309     return;
310   browser->tab_strip_model()->AddObserver(this);
311 }
312
313 void UmaPolicy::OnBrowserRemoved(Browser* browser) {
314   if (!profile_->IsSameProfile(browser->profile()))
315     return;
316   browser->tab_strip_model()->RemoveObserver(this);
317 }
318
319 // Use the value from SessionTabHelper::IdForTab, *not* |index|. |index| will be
320 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
321 // each tab.
322 void UmaPolicy::TabChangedAt(content::WebContents* contents,
323                              int index,
324                              TabChangeType change_type) {
325   if (change_type != TabStripModelObserver::LOADING_ONLY)
326     return;
327   if (!contents)
328     return;
329
330   std::string url = CleanURL(contents->GetLastCommittedURL());
331   int32 tab_id = SessionTabHelper::IdForTab(contents);
332
333   std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id);
334
335   // Ignore tabs that haven't changed status.
336   if (tab_it != tab_list_.end() && tab_it->second == url)
337     return;
338
339   // Is this an existing tab whose URL has changed.
340   if (tab_it != tab_list_.end()) {
341     CleanupClosedPage(tab_it->second, contents);
342     tab_list_.erase(tab_id);
343   }
344
345   // Check that tab_list_ isn't over the kMaxTabsTracked budget.
346   if (tab_list_.size() >= kMaxTabsTracked)
347     return;
348
349   // Set up the new entries.
350   tab_list_[tab_id] = url;
351   SetupOpenedPage(url);
352 }
353
354 // Use the value from SessionTabHelper::IdForTab, *not* |index|. |index| will be
355 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
356 // each tab.
357 void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model,
358                              content::WebContents* contents,
359                              int index) {
360   if (!contents)
361     return;
362   std::string url = CleanURL(contents->GetLastCommittedURL());
363   int32 tab_id = SessionTabHelper::IdForTab(contents);
364   std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id);
365   if (tab_it != tab_list_.end())
366     tab_list_.erase(tab_id);
367
368   CleanupClosedPage(url, contents);
369 }
370
371 void UmaPolicy::SetupOpenedPage(const std::string& url) {
372   url_status_[url][kNumberOfTabs]++;
373 }
374
375 void UmaPolicy::CleanupClosedPage(const std::string& cleaned_url,
376                                   content::WebContents* web_contents) {
377   SiteMap::iterator old_site_lookup = url_status_.find(cleaned_url);
378   if (old_site_lookup == url_status_.end())
379     return;
380   old_site_lookup->second[kNumberOfTabs]--;
381   if (old_site_lookup->second[kNumberOfTabs] == 0) {
382     HistogramOnClose(cleaned_url, web_contents);
383     url_status_.erase(cleaned_url);
384   }
385 }
386
387 // Helpers. --------------------------------------------------------------------
388
389 // We don't want to treat # ref navigations as if they were new pageloads.
390 // So we get rid of the ref if it has it.
391 // We convert to a string in the hopes that this is faster than Replacements.
392 std::string UmaPolicy::CleanURL(const GURL& gurl) {
393   if (gurl.spec().empty())
394     return GURL(url::kAboutBlankURL).spec();
395   if (!gurl.is_valid())
396     return gurl.spec();
397   if (!gurl.has_ref())
398     return gurl.spec();
399   std::string port = "";
400   if (gurl.has_port())
401     port = ":" + gurl.port();
402   std::string query = "";
403   if (gurl.has_query())
404     query = "?" + gurl.query();
405   return base::StringPrintf("%s://%s%s%s%s",
406                             gurl.scheme().c_str(),
407                             gurl.host().c_str(),
408                             port.c_str(),
409                             gurl.path().c_str(),
410                             query.c_str());
411 }
412
413 const char* UmaPolicy::GetHistogramName(PageStatus status) {
414   switch (status) {
415     case CONTENT_SCRIPT:
416       return "ContentScript";
417     case READ_DOM:
418       return "ReadDom";
419     case MODIFIED_DOM:
420       return "ModifiedDom";
421     case DOM_METHOD:
422       return "InvokedDomMethod";
423     case DOCUMENT_WRITE:
424       return "DocumentWrite";
425     case INNER_HTML:
426       return "InnerHtml";
427     case CREATED_SCRIPT:
428       return "CreatedScript";
429     case CREATED_IFRAME:
430       return "CreatedIframe";
431     case CREATED_DIV:
432       return "CreatedDiv";
433     case CREATED_LINK:
434       return "CreatedLink";
435     case CREATED_INPUT:
436       return "CreatedInput";
437     case CREATED_EMBED:
438       return "CreatedEmbed";
439     case CREATED_OBJECT:
440       return "CreatedObject";
441     case AD_INJECTED:
442       return "AdInjected";
443     case AD_REMOVED:
444       return "AdRemoved";
445     case AD_REPLACED:
446       return "AdReplaced";
447     case AD_LIKELY_INJECTED:
448       return "AdLikelyInjected";
449     case AD_LIKELY_REPLACED:
450       return "AdLikelyReplaced";
451     case NONE:
452     case MAX_STATUS:
453     default:
454       NOTREACHED();
455       return "";
456   }
457 }
458
459 }  // namespace extensions