Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / components / policy / core / browser / url_blacklist_manager.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/policy/core/browser/url_blacklist_manager.h"
6
7 #include "base/bind.h"
8 #include "base/files/file_path.h"
9 #include "base/location.h"
10 #include "base/message_loop/message_loop_proxy.h"
11 #include "base/prefs/pref_service.h"
12 #include "base/sequenced_task_runner.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/task_runner_util.h"
16 #include "base/values.h"
17 #include "components/policy/core/common/policy_pref_names.h"
18 #include "components/pref_registry/pref_registry_syncable.h"
19 #include "net/base/filename_util.h"
20 #include "net/base/load_flags.h"
21 #include "net/base/net_errors.h"
22 #include "net/url_request/url_request.h"
23 #include "url/url_constants.h"
24 #include "url/url_parse.h"
25
26 using url_matcher::URLMatcher;
27 using url_matcher::URLMatcherCondition;
28 using url_matcher::URLMatcherConditionFactory;
29 using url_matcher::URLMatcherConditionSet;
30 using url_matcher::URLMatcherPortFilter;
31 using url_matcher::URLMatcherSchemeFilter;
32 using url_matcher::URLQueryElementMatcherCondition;
33
34 namespace policy {
35
36 namespace {
37
38 // List of schemes of URLs that should not be blocked by the "*" wildcard in
39 // the blacklist. Note that URLs with these schemes can still be blocked with
40 // a more specific filter e.g. "chrome-extension://*".
41 // The schemes are hardcoded here to avoid dependencies on //extensions and
42 // //chrome.
43 const char* kBypassBlacklistWildcardForSchemes[] = {
44   // For internal extension URLs e.g. the Bookmark Manager and the File
45   // Manager on Chrome OS.
46   "chrome-extension",
47
48   // NTP on Android.
49   "chrome-native",
50
51   // NTP on other platforms.
52   "chrome-search",
53 };
54
55 // Maximum filters per policy. Filters over this index are ignored.
56 const size_t kMaxFiltersPerPolicy = 1000;
57
58 // A task that builds the blacklist on a background thread.
59 scoped_ptr<URLBlacklist> BuildBlacklist(
60     scoped_ptr<base::ListValue> block,
61     scoped_ptr<base::ListValue> allow,
62     URLBlacklist::SegmentURLCallback segment_url) {
63   scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
64   blacklist->Block(block.get());
65   blacklist->Allow(allow.get());
66   return blacklist.Pass();
67 }
68
69 // Tokenise the parameter |query| and add appropriate query element matcher
70 // conditions to the |query_conditions|.
71 void ProcessQueryToConditions(
72     url_matcher::URLMatcherConditionFactory* condition_factory,
73     const std::string& query,
74     bool allow,
75     std::set<URLQueryElementMatcherCondition>* query_conditions) {
76   url::Component query_left = url::MakeRange(0, query.length());
77   url::Component key;
78   url::Component value;
79   // Depending on the filter type being black-list or white-list, the matcher
80   // choose any or every match. The idea is a URL should be black-listed if
81   // there is any occurrence of the key value pair. It should be white-listed
82   // only if every occurrence of the key is followed by the value. This avoids
83   // situations such as a user appending a white-listed video parameter in the
84   // end of the query and watching a video of his choice (the last parameter is
85   // ignored by some web servers like youtube's).
86   URLQueryElementMatcherCondition::Type match_type =
87       allow ? URLQueryElementMatcherCondition::MATCH_ALL
88             : URLQueryElementMatcherCondition::MATCH_ANY;
89
90   while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) {
91     URLQueryElementMatcherCondition::QueryElementType query_element_type =
92         value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE
93                   : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY;
94     URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type;
95     if (!value.len && key.len && query[key.end() - 1] == '*') {
96       --key.len;
97       query_value_match_type =
98           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
99     } else if (value.len && query[value.end() - 1] == '*') {
100       --value.len;
101       query_value_match_type =
102           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
103     } else {
104       query_value_match_type =
105           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT;
106     }
107     query_conditions->insert(
108         URLQueryElementMatcherCondition(query.substr(key.begin, key.len),
109                                         query.substr(value.begin, value.len),
110                                         query_value_match_type,
111                                         query_element_type,
112                                         match_type,
113                                         condition_factory));
114   }
115 }
116
117 bool BypassBlacklistWildcardForURL(const GURL& url) {
118   const std::string& scheme = url.scheme();
119   for (size_t i = 0; i < arraysize(kBypassBlacklistWildcardForSchemes); ++i) {
120     if (scheme == kBypassBlacklistWildcardForSchemes[i])
121       return true;
122   }
123   return false;
124 }
125
126 }  // namespace
127
128 struct URLBlacklist::FilterComponents {
129   FilterComponents() : port(0), match_subdomains(true), allow(true) {}
130   ~FilterComponents() {}
131
132   // Returns true if |this| represents the "*" filter in the blacklist.
133   bool IsBlacklistWildcard() const {
134     return !allow && host.empty() && scheme.empty() && path.empty() &&
135            query.empty() && port == 0 && number_of_key_value_pairs == 0 &&
136            match_subdomains;
137   }
138
139   std::string scheme;
140   std::string host;
141   uint16 port;
142   std::string path;
143   std::string query;
144   int number_of_key_value_pairs;
145   bool match_subdomains;
146   bool allow;
147 };
148
149 URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
150     : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
151
152 URLBlacklist::~URLBlacklist() {}
153
154 void URLBlacklist::AddFilters(bool allow,
155                               const base::ListValue* list) {
156   URLMatcherConditionSet::Vector all_conditions;
157   size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
158   for (size_t i = 0; i < size; ++i) {
159     std::string pattern;
160     bool success = list->GetString(i, &pattern);
161     DCHECK(success);
162     FilterComponents components;
163     components.allow = allow;
164     if (!FilterToComponents(segment_url_,
165                             pattern,
166                             &components.scheme,
167                             &components.host,
168                             &components.match_subdomains,
169                             &components.port,
170                             &components.path,
171                             &components.query)) {
172       LOG(ERROR) << "Invalid pattern " << pattern;
173       continue;
174     }
175
176     scoped_refptr<URLMatcherConditionSet> condition_set =
177         CreateConditionSet(url_matcher_.get(),
178                            ++id_,
179                            components.scheme,
180                            components.host,
181                            components.match_subdomains,
182                            components.port,
183                            components.path,
184                            components.query,
185                            allow);
186     components.number_of_key_value_pairs =
187         condition_set->query_conditions().size();
188     all_conditions.push_back(condition_set);
189     filters_[id_] = components;
190   }
191   url_matcher_->AddConditionSets(all_conditions);
192 }
193
194 void URLBlacklist::Block(const base::ListValue* filters) {
195   AddFilters(false, filters);
196 }
197
198 void URLBlacklist::Allow(const base::ListValue* filters) {
199   AddFilters(true, filters);
200 }
201
202 bool URLBlacklist::IsURLBlocked(const GURL& url) const {
203   std::set<URLMatcherConditionSet::ID> matching_ids =
204       url_matcher_->MatchURL(url);
205
206   const FilterComponents* max = NULL;
207   for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
208        id != matching_ids.end(); ++id) {
209     std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
210     DCHECK(it != filters_.end());
211     const FilterComponents& filter = it->second;
212     if (!max || FilterTakesPrecedence(filter, *max))
213       max = &filter;
214   }
215
216   // Default to allow.
217   if (!max)
218     return false;
219
220   // Some of the internal Chrome URLs are not affected by the "*" in the
221   // blacklist. Note that the "*" is the lowest priority filter possible, so
222   // any higher priority filter will be applied first.
223   if (max->IsBlacklistWildcard() && BypassBlacklistWildcardForURL(url))
224     return false;
225
226   return !max->allow;
227 }
228
229 size_t URLBlacklist::Size() const {
230   return filters_.size();
231 }
232
233 // static
234 bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
235                                       const std::string& filter,
236                                       std::string* scheme,
237                                       std::string* host,
238                                       bool* match_subdomains,
239                                       uint16* port,
240                                       std::string* path,
241                                       std::string* query) {
242   url::Parsed parsed;
243
244   if (segment_url(filter, &parsed) == url::kFileScheme) {
245     base::FilePath file_path;
246     if (!net::FileURLToFilePath(GURL(filter), &file_path))
247       return false;
248
249     *scheme = url::kFileScheme;
250     host->clear();
251     *match_subdomains = true;
252     *port = 0;
253     // Special path when the |filter| is 'file://*'.
254     *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
255 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
256     // Separators have to be canonicalized on Windows.
257     std::replace(path->begin(), path->end(), '\\', '/');
258     *path = "/" + *path;
259 #endif
260     return true;
261   }
262
263   if (!parsed.host.is_nonempty())
264     return false;
265
266   if (parsed.scheme.is_nonempty())
267     scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
268   else
269     scheme->clear();
270
271   host->assign(filter, parsed.host.begin, parsed.host.len);
272   // Special '*' host, matches all hosts.
273   if (*host == "*") {
274     host->clear();
275     *match_subdomains = true;
276   } else if ((*host)[0] == '.') {
277     // A leading dot in the pattern syntax means that we don't want to match
278     // subdomains.
279     host->erase(0, 1);
280     *match_subdomains = false;
281   } else {
282     url::RawCanonOutputT<char> output;
283     url::CanonHostInfo host_info;
284     url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output,
285                                  &host_info);
286     if (host_info.family == url::CanonHostInfo::NEUTRAL) {
287       // We want to match subdomains. Add a dot in front to make sure we only
288       // match at domain component boundaries.
289       *host = "." + *host;
290       *match_subdomains = true;
291     } else {
292       *match_subdomains = false;
293     }
294   }
295
296   if (parsed.port.is_nonempty()) {
297     int int_port;
298     if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
299                            &int_port)) {
300       return false;
301     }
302     if (int_port <= 0 || int_port > kuint16max)
303       return false;
304     *port = int_port;
305   } else {
306     // Match any port.
307     *port = 0;
308   }
309
310   if (parsed.path.is_nonempty())
311     path->assign(filter, parsed.path.begin, parsed.path.len);
312   else
313     path->clear();
314
315   if (query) {
316     if (parsed.query.is_nonempty())
317       query->assign(filter, parsed.query.begin, parsed.query.len);
318     else
319       query->clear();
320   }
321
322   return true;
323 }
324
325 // static
326 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
327     URLMatcher* url_matcher,
328     int id,
329     const std::string& scheme,
330     const std::string& host,
331     bool match_subdomains,
332     uint16 port,
333     const std::string& path,
334     const std::string& query,
335     bool allow) {
336   URLMatcherConditionFactory* condition_factory =
337       url_matcher->condition_factory();
338   std::set<URLMatcherCondition> conditions;
339   conditions.insert(match_subdomains ?
340       condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
341       condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
342
343   std::set<URLQueryElementMatcherCondition> query_conditions;
344   if (!query.empty()) {
345     ProcessQueryToConditions(
346         condition_factory, query, allow, &query_conditions);
347   }
348
349   scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
350   if (!scheme.empty())
351     scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
352
353   scoped_ptr<URLMatcherPortFilter> port_filter;
354   if (port != 0) {
355     std::vector<URLMatcherPortFilter::Range> ranges;
356     ranges.push_back(URLMatcherPortFilter::CreateRange(port));
357     port_filter.reset(new URLMatcherPortFilter(ranges));
358   }
359
360   return new URLMatcherConditionSet(id,
361                                     conditions,
362                                     query_conditions,
363                                     scheme_filter.Pass(),
364                                     port_filter.Pass());
365 }
366
367 // static
368 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
369                                          const FilterComponents& rhs) {
370   // The "*" wildcard is the lowest priority filter.
371   if (rhs.IsBlacklistWildcard())
372     return true;
373
374   if (lhs.match_subdomains && !rhs.match_subdomains)
375     return false;
376   if (!lhs.match_subdomains && rhs.match_subdomains)
377     return true;
378
379   size_t host_length = lhs.host.length();
380   size_t other_host_length = rhs.host.length();
381   if (host_length != other_host_length)
382     return host_length > other_host_length;
383
384   size_t path_length = lhs.path.length();
385   size_t other_path_length = rhs.path.length();
386   if (path_length != other_path_length)
387     return path_length > other_path_length;
388
389   if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs)
390     return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs;
391
392   if (lhs.allow && !rhs.allow)
393     return true;
394
395   return false;
396 }
397
398 URLBlacklistManager::URLBlacklistManager(
399     PrefService* pref_service,
400     const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
401     const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
402     URLBlacklist::SegmentURLCallback segment_url,
403     OverrideBlacklistCallback override_blacklist)
404     : pref_service_(pref_service),
405       background_task_runner_(background_task_runner),
406       io_task_runner_(io_task_runner),
407       segment_url_(segment_url),
408       override_blacklist_(override_blacklist),
409       ui_task_runner_(base::MessageLoopProxy::current()),
410       blacklist_(new URLBlacklist(segment_url)),
411       ui_weak_ptr_factory_(this),
412       io_weak_ptr_factory_(this) {
413   pref_change_registrar_.Init(pref_service_);
414   base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
415                                       base::Unretained(this));
416   pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
417   pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
418
419   // Start enforcing the policies without a delay when they are present at
420   // startup.
421   if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
422     Update();
423 }
424
425 void URLBlacklistManager::ShutdownOnUIThread() {
426   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
427   // Cancel any pending updates, and stop listening for pref change updates.
428   ui_weak_ptr_factory_.InvalidateWeakPtrs();
429   pref_change_registrar_.RemoveAll();
430 }
431
432 URLBlacklistManager::~URLBlacklistManager() {
433 }
434
435 void URLBlacklistManager::ScheduleUpdate() {
436   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
437   // Cancel pending updates, if any. This can happen if two preferences that
438   // change the blacklist are updated in one message loop cycle. In those cases,
439   // only rebuild the blacklist after all the preference updates are processed.
440   ui_weak_ptr_factory_.InvalidateWeakPtrs();
441   ui_task_runner_->PostTask(
442       FROM_HERE,
443       base::Bind(&URLBlacklistManager::Update,
444                  ui_weak_ptr_factory_.GetWeakPtr()));
445 }
446
447 void URLBlacklistManager::Update() {
448   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
449
450   // The preferences can only be read on the UI thread.
451   scoped_ptr<base::ListValue> block(
452       pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
453   scoped_ptr<base::ListValue> allow(
454       pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
455
456   // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
457   // here, since this task will always execute before a potential deletion of
458   // ProfileIOData on IO.
459   io_task_runner_->PostTask(FROM_HERE,
460                             base::Bind(&URLBlacklistManager::UpdateOnIO,
461                                        base::Unretained(this),
462                                        base::Passed(&block),
463                                        base::Passed(&allow)));
464 }
465
466 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
467                                      scoped_ptr<base::ListValue> allow) {
468   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
469   // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
470   // to the URLBlacklistManager on IO.
471   base::PostTaskAndReplyWithResult(
472       background_task_runner_.get(),
473       FROM_HERE,
474       base::Bind(&BuildBlacklist,
475                  base::Passed(&block),
476                  base::Passed(&allow),
477                  segment_url_),
478       base::Bind(&URLBlacklistManager::SetBlacklist,
479                  io_weak_ptr_factory_.GetWeakPtr()));
480 }
481
482 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
483   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
484   blacklist_ = blacklist.Pass();
485 }
486
487 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
488   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
489   return blacklist_->IsURLBlocked(url);
490 }
491
492 bool URLBlacklistManager::IsRequestBlocked(
493     const net::URLRequest& request, int* reason) const {
494   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
495 #if !defined(OS_IOS)
496   // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
497   int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
498   if ((request.load_flags() & filter_flags) == 0)
499     return false;
500 #endif
501
502   bool block = false;
503   if (override_blacklist_.Run(request.url(), &block, reason))
504     return block;
505
506   *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR;
507   return IsURLBlocked(request.url());
508 }
509
510 // static
511 void URLBlacklistManager::RegisterProfilePrefs(
512     user_prefs::PrefRegistrySyncable* registry) {
513   registry->RegisterListPref(policy_prefs::kUrlBlacklist,
514                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
515   registry->RegisterListPref(policy_prefs::kUrlWhitelist,
516                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
517 }
518
519 }  // namespace policy