Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / extensions / activity_log / uma_policy.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/activity_log/uma_policy.h"
6
7 #include "base/metrics/histogram.h"
8 #include "base/strings/stringprintf.h"
9 #include "chrome/browser/browser_process.h"
10 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
11 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
12 #include "chrome/browser/sessions/session_id.h"
13 #include "chrome/browser/ui/browser.h"
14 #include "chrome/browser/ui/browser_list.h"
15 #include "chrome/browser/ui/tabs/tab_strip_model.h"
16 #include "chrome/common/url_constants.h"
17 #include "content/public/browser/web_contents.h"
18 #include "content/public/common/url_constants.h"
19 #include "extensions/common/dom_action_types.h"
20
21 namespace extensions {
22
23 namespace {
24
25 // For convenience.
26 const int kNoStatus           = UmaPolicy::NONE;
27 const int kContentScript      = 1 << UmaPolicy::CONTENT_SCRIPT;
28 const int kReadDom            = 1 << UmaPolicy::READ_DOM;
29 const int kModifiedDom        = 1 << UmaPolicy::MODIFIED_DOM;
30 const int kDomMethod          = 1 << UmaPolicy::DOM_METHOD;
31 const int kDocumentWrite      = 1 << UmaPolicy::DOCUMENT_WRITE;
32 const int kInnerHtml          = 1 << UmaPolicy::INNER_HTML;
33 const int kCreatedScript      = 1 << UmaPolicy::CREATED_SCRIPT;
34 const int kCreatedIframe      = 1 << UmaPolicy::CREATED_IFRAME;
35 const int kCreatedDiv         = 1 << UmaPolicy::CREATED_DIV;
36 const int kCreatedLink        = 1 << UmaPolicy::CREATED_LINK;
37 const int kCreatedInput       = 1 << UmaPolicy::CREATED_INPUT;
38 const int kCreatedEmbed       = 1 << UmaPolicy::CREATED_EMBED;
39 const int kCreatedObject      = 1 << UmaPolicy::CREATED_OBJECT;
40 const int kAdInjected         = 1 << UmaPolicy::AD_INJECTED;
41 const int kAdRemoved          = 1 << UmaPolicy::AD_REMOVED;
42 const int kAdReplaced         = 1 << UmaPolicy::AD_REPLACED;
43
44 }  // namespace
45
46 // Class constants, also used in testing. --------------------------------------
47
48 const char UmaPolicy::kNumberOfTabs[]       = "num_tabs";
49 const size_t UmaPolicy::kMaxTabsTracked     = 50;
50
51 // Setup and shutdown. ---------------------------------------------------------
52
53 UmaPolicy::UmaPolicy(Profile* profile)
54     : ActivityLogPolicy(profile), profile_(profile) {
55   DCHECK(!profile->IsOffTheRecord());
56   BrowserList::AddObserver(this);
57 }
58
59 UmaPolicy::~UmaPolicy() {
60   BrowserList::RemoveObserver(this);
61 }
62
63 // Unlike the other policies, UmaPolicy can commit suicide directly because it
64 // doesn't have a dependency on a database.
65 void UmaPolicy::Close() {
66   delete this;
67 }
68
69 // Process actions. ------------------------------------------------------------
70
71 void UmaPolicy::ProcessAction(scoped_refptr<Action> action) {
72   if (!action->page_url().is_valid() && !action->arg_url().is_valid())
73     return;
74   if (action->page_incognito() || action->arg_incognito())
75     return;
76   std::string url;
77   int status = MatchActionToStatus(action);
78   if (action->page_url().is_valid()) {
79     url = CleanURL(action->page_url());
80   } else if (status & kContentScript) {
81     // This is for the tabs.executeScript case.
82     url = CleanURL(action->arg_url());
83   }
84   if (url.empty())
85     return;
86
87   SiteMap::iterator site_lookup = url_status_.find(url);
88   if (site_lookup != url_status_.end())
89     site_lookup->second[action->extension_id()] |= status;
90 }
91
92 int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) {
93   if (action->action_type() == Action::ACTION_CONTENT_SCRIPT) {
94     return kContentScript;
95   } else if (action->action_type() == Action::ACTION_API_CALL &&
96              action->api_name() == "tabs.executeScript") {
97     return kContentScript;
98   } else if (action->action_type() != Action::ACTION_DOM_ACCESS) {
99     return kNoStatus;
100   }
101
102   int dom_verb;
103   if (!action->other() ||
104       !action->other()->GetIntegerWithoutPathExpansion(
105           activity_log_constants::kActionDomVerb, &dom_verb)) {
106     return kNoStatus;
107   }
108
109   int ret_bit = kNoStatus;
110   DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb);
111   if (dom_type == DomActionType::GETTER)
112     return kReadDom;
113   if (dom_type == DomActionType::SETTER) {
114     ret_bit |= kModifiedDom;
115   } else if (dom_type == DomActionType::METHOD) {
116     ret_bit |= kDomMethod;
117   } else {
118     return kNoStatus;
119   }
120
121   if (action->api_name() == "HTMLDocument.write" ||
122       action->api_name() == "HTMLDocument.writeln") {
123     ret_bit |= kDocumentWrite;
124   } else if (action->api_name() == "Element.innerHTML") {
125     ret_bit |= kInnerHtml;
126   } else if (action->api_name() == "Document.createElement") {
127     std::string arg;
128     action->args()->GetString(0, &arg);
129     if (arg == "script") {
130       ret_bit |= kCreatedScript;
131     } else if (arg == "iframe") {
132       ret_bit |= kCreatedIframe;
133     } else if (arg == "div") {
134       ret_bit |= kCreatedDiv;
135     } else if (arg == "a") {
136       ret_bit |= kCreatedLink;
137     } else if (arg == "input") {
138       ret_bit |= kCreatedInput;
139     } else if (arg == "embed") {
140       ret_bit |= kCreatedEmbed;
141     } else if (arg == "object") {
142       ret_bit |= kCreatedObject;
143     }
144   }
145
146   const Action::InjectionType ad_injection =
147       action->DidInjectAd(g_browser_process->rappor_service());
148   switch (ad_injection) {
149     case Action::INJECTION_NEW_AD:
150       ret_bit |= kAdInjected;
151       break;
152     case Action::INJECTION_REMOVED_AD:
153       ret_bit |= kAdRemoved;
154       break;
155     case Action::INJECTION_REPLACED_AD:
156       ret_bit |= kAdReplaced;
157       break;
158     case Action::NO_AD_INJECTION:
159       break;
160     case Action::NUM_INJECTION_TYPES:
161       NOTREACHED();
162   };
163
164   return ret_bit;
165 }
166
167 void UmaPolicy::HistogramOnClose(const std::string& url) {
168   // Let's try to avoid histogramming useless URLs.
169   if (url.empty() || url == content::kAboutBlankURL ||
170       url == chrome::kChromeUINewTabURL)
171     return;
172
173   int statuses[MAX_STATUS - 1];
174   std::memset(statuses, 0, sizeof(statuses));
175
176   SiteMap::iterator site_lookup = url_status_.find(url);
177   ExtensionMap exts = site_lookup->second;
178   ExtensionMap::iterator ext_iter;
179   for (ext_iter = exts.begin(); ext_iter != exts.end(); ++ext_iter) {
180     if (ext_iter->first == kNumberOfTabs)
181       continue;
182     for (int i = NONE + 1; i < MAX_STATUS; ++i) {
183       if (ext_iter->second & (1 << i))
184         statuses[i-1]++;
185     }
186   }
187
188   std::string prefix = "ExtensionActivity.";
189   if (GURL(url).host() != "www.google.com") {
190     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
191                              statuses[CONTENT_SCRIPT - 1]);
192     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
193                              statuses[READ_DOM - 1]);
194     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
195                              statuses[MODIFIED_DOM - 1]);
196     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
197                              statuses[DOM_METHOD - 1]);
198     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
199                              statuses[DOCUMENT_WRITE - 1]);
200     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
201                              statuses[INNER_HTML - 1]);
202     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
203                              statuses[CREATED_SCRIPT - 1]);
204     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
205                              statuses[CREATED_IFRAME - 1]);
206     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
207                              statuses[CREATED_DIV - 1]);
208     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
209                              statuses[CREATED_LINK - 1]);
210     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
211                              statuses[CREATED_INPUT - 1]);
212     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
213                              statuses[CREATED_EMBED - 1]);
214     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
215                              statuses[CREATED_OBJECT - 1]);
216     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
217                              statuses[AD_INJECTED - 1]);
218     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
219                              statuses[AD_REMOVED - 1]);
220     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
221                              statuses[AD_REPLACED - 1]);
222   } else {
223     prefix += "Google.";
224     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
225                              statuses[CONTENT_SCRIPT - 1]);
226     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
227                              statuses[READ_DOM - 1]);
228     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
229                              statuses[MODIFIED_DOM - 1]);
230     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
231                              statuses[DOM_METHOD - 1]);
232     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
233                              statuses[DOCUMENT_WRITE - 1]);
234     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
235                              statuses[INNER_HTML - 1]);
236     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
237                              statuses[CREATED_SCRIPT - 1]);
238     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
239                              statuses[CREATED_IFRAME - 1]);
240     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
241                              statuses[CREATED_DIV - 1]);
242     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
243                              statuses[CREATED_LINK - 1]);
244     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
245                              statuses[CREATED_INPUT - 1]);
246     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
247                              statuses[CREATED_EMBED - 1]);
248     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
249                              statuses[CREATED_OBJECT - 1]);
250     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
251                              statuses[AD_INJECTED - 1]);
252     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
253                              statuses[AD_REMOVED - 1]);
254     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
255                              statuses[AD_REPLACED - 1]);
256   }
257 }
258
259 // Handle tab tracking. --------------------------------------------------------
260
261 void UmaPolicy::OnBrowserAdded(Browser* browser) {
262   if (!profile_->IsSameProfile(browser->profile()))
263     return;
264   browser->tab_strip_model()->AddObserver(this);
265 }
266
267 void UmaPolicy::OnBrowserRemoved(Browser* browser) {
268   if (!profile_->IsSameProfile(browser->profile()))
269     return;
270   browser->tab_strip_model()->RemoveObserver(this);
271 }
272
273 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be
274 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
275 // each tab.
276 void UmaPolicy::TabChangedAt(content::WebContents* contents,
277                              int index,
278                              TabChangeType change_type) {
279   if (change_type != TabStripModelObserver::LOADING_ONLY)
280     return;
281   if (!contents)
282     return;
283
284   std::string url = CleanURL(contents->GetLastCommittedURL());
285   int32 tab_id = SessionID::IdForTab(contents);
286
287   std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id);
288
289   // Ignore tabs that haven't changed status.
290   if (tab_it != tab_list_.end() && tab_it->second == url)
291     return;
292
293   // Is this an existing tab whose URL has changed.
294   if (tab_it != tab_list_.end()) {
295     CleanupClosedPage(tab_it->second);
296     tab_list_.erase(tab_id);
297   }
298
299   // Check that tab_list_ isn't over the kMaxTabsTracked budget.
300   if (tab_list_.size() >= kMaxTabsTracked)
301     return;
302
303   // Set up the new entries.
304   tab_list_[tab_id] = url;
305   SetupOpenedPage(url);
306 }
307
308 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be
309 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
310 // each tab.
311 void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model,
312                              content::WebContents* contents,
313                              int index) {
314   if (!contents)
315     return;
316   std::string url = CleanURL(contents->GetLastCommittedURL());
317   int32 tab_id = SessionID::IdForTab(contents);
318   std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id);
319   if (tab_it != tab_list_.end())
320     tab_list_.erase(tab_id);
321
322   CleanupClosedPage(url);
323 }
324
325 void UmaPolicy::SetupOpenedPage(const std::string& url) {
326   url_status_[url][kNumberOfTabs]++;
327 }
328
329 void UmaPolicy::CleanupClosedPage(const std::string& url) {
330   SiteMap::iterator old_site_lookup = url_status_.find(url);
331   if (old_site_lookup == url_status_.end())
332     return;
333   old_site_lookup->second[kNumberOfTabs]--;
334   if (old_site_lookup->second[kNumberOfTabs] == 0) {
335     HistogramOnClose(url);
336     url_status_.erase(url);
337   }
338 }
339
340 // Helpers. --------------------------------------------------------------------
341
342 // We don't want to treat # ref navigations as if they were new pageloads.
343 // So we get rid of the ref if it has it.
344 // We convert to a string in the hopes that this is faster than Replacements.
345 std::string UmaPolicy::CleanURL(const GURL& gurl) {
346   if (gurl.spec().empty())
347     return GURL(content::kAboutBlankURL).spec();
348   if (!gurl.is_valid())
349     return gurl.spec();
350   if (!gurl.has_ref())
351     return gurl.spec();
352   std::string port = "";
353   if (gurl.has_port())
354     port = ":" + gurl.port();
355   std::string query = "";
356   if (gurl.has_query())
357     query = "?" + gurl.query();
358   return base::StringPrintf("%s://%s%s%s%s",
359                             gurl.scheme().c_str(),
360                             gurl.host().c_str(),
361                             port.c_str(),
362                             gurl.path().c_str(),
363                             query.c_str());
364 }
365
366 const char* UmaPolicy::GetHistogramName(PageStatus status) {
367   switch (status) {
368     case CONTENT_SCRIPT:
369       return "ContentScript";
370     case READ_DOM:
371       return "ReadDom";
372     case MODIFIED_DOM:
373       return "ModifiedDom";
374     case DOM_METHOD:
375       return "InvokedDomMethod";
376     case DOCUMENT_WRITE:
377       return "DocumentWrite";
378     case INNER_HTML:
379       return "InnerHtml";
380     case CREATED_SCRIPT:
381       return "CreatedScript";
382     case CREATED_IFRAME:
383       return "CreatedIframe";
384     case CREATED_DIV:
385       return "CreatedDiv";
386     case CREATED_LINK:
387       return "CreatedLink";
388     case CREATED_INPUT:
389       return "CreatedInput";
390     case CREATED_EMBED:
391       return "CreatedEmbed";
392     case CREATED_OBJECT:
393       return "CreatedObject";
394     case AD_INJECTED:
395       return "AdInjected";
396     case AD_REMOVED:
397       return "AdRemoved";
398     case AD_REPLACED:
399       return "AdReplaced";
400     case NONE:
401     case MAX_STATUS:
402     default:
403       NOTREACHED();
404       return "";
405   }
406 }
407
408 }  // namespace extensions