Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / components / policy / core / browser / url_blacklist_manager.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/policy/core/browser/url_blacklist_manager.h"
6
7 #include "base/bind.h"
8 #include "base/files/file_path.h"
9 #include "base/location.h"
10 #include "base/message_loop/message_loop_proxy.h"
11 #include "base/prefs/pref_service.h"
12 #include "base/sequenced_task_runner.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/task_runner_util.h"
16 #include "base/values.h"
17 #include "components/policy/core/common/policy_pref_names.h"
18 #include "components/user_prefs/pref_registry_syncable.h"
19 #include "net/base/filename_util.h"
20 #include "net/base/load_flags.h"
21 #include "net/base/net_errors.h"
22 #include "net/url_request/url_request.h"
23 #include "url/url_parse.h"
24
25 using url_matcher::URLMatcher;
26 using url_matcher::URLMatcherCondition;
27 using url_matcher::URLMatcherConditionFactory;
28 using url_matcher::URLMatcherConditionSet;
29 using url_matcher::URLMatcherPortFilter;
30 using url_matcher::URLMatcherSchemeFilter;
31 using url_matcher::URLQueryElementMatcherCondition;
32
33 namespace policy {
34
35 namespace {
36
37 const char kFileScheme[] = "file";
38
39 // Maximum filters per policy. Filters over this index are ignored.
40 const size_t kMaxFiltersPerPolicy = 1000;
41
42 // A task that builds the blacklist on a background thread.
43 scoped_ptr<URLBlacklist> BuildBlacklist(
44     scoped_ptr<base::ListValue> block,
45     scoped_ptr<base::ListValue> allow,
46     URLBlacklist::SegmentURLCallback segment_url) {
47   scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
48   blacklist->Block(block.get());
49   blacklist->Allow(allow.get());
50   return blacklist.Pass();
51 }
52
53 // Tokenise the parameter |query| and add appropriate query element matcher
54 // conditions to the |query_conditions|.
55 void ProcessQueryToConditions(
56     url_matcher::URLMatcherConditionFactory* condition_factory,
57     const std::string& query,
58     bool allow,
59     std::set<URLQueryElementMatcherCondition>* query_conditions) {
60   url::Component query_left = url::MakeRange(0, query.length());
61   url::Component key;
62   url::Component value;
63   // Depending on the filter type being black-list or white-list, the matcher
64   // choose any or every match. The idea is a URL should be black-listed if
65   // there is any occurrence of the key value pair. It should be white-listed
66   // only if every occurrence of the key is followed by the value. This avoids
67   // situations such as a user appending a white-listed video parameter in the
68   // end of the query and watching a video of his choice (the last parameter is
69   // ignored by some web servers like youtube's).
70   URLQueryElementMatcherCondition::Type match_type =
71       allow ? URLQueryElementMatcherCondition::MATCH_ALL
72             : URLQueryElementMatcherCondition::MATCH_ANY;
73
74   while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) {
75     URLQueryElementMatcherCondition::QueryElementType query_element_type =
76         value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE
77                   : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY;
78     URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type;
79     if (!value.len && key.len && query[key.end() - 1] == '*') {
80       --key.len;
81       query_value_match_type =
82           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
83     } else if (value.len && query[value.end() - 1] == '*') {
84       --value.len;
85       query_value_match_type =
86           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
87     } else {
88       query_value_match_type =
89           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT;
90     }
91     query_conditions->insert(
92         URLQueryElementMatcherCondition(query.substr(key.begin, key.len),
93                                         query.substr(value.begin, value.len),
94                                         query_value_match_type,
95                                         query_element_type,
96                                         match_type,
97                                         condition_factory));
98   }
99 }
100
101 }  // namespace
102
103 struct URLBlacklist::FilterComponents {
104   FilterComponents() : port(0), match_subdomains(true), allow(true) {}
105   ~FilterComponents() {}
106
107   std::string scheme;
108   std::string host;
109   uint16 port;
110   std::string path;
111   std::string query;
112   int number_of_key_value_pairs;
113   bool match_subdomains;
114   bool allow;
115 };
116
117 URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
118     : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
119
120 URLBlacklist::~URLBlacklist() {}
121
122 void URLBlacklist::AddFilters(bool allow,
123                               const base::ListValue* list) {
124   URLMatcherConditionSet::Vector all_conditions;
125   size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
126   for (size_t i = 0; i < size; ++i) {
127     std::string pattern;
128     bool success = list->GetString(i, &pattern);
129     DCHECK(success);
130     FilterComponents components;
131     components.allow = allow;
132     if (!FilterToComponents(segment_url_,
133                             pattern,
134                             &components.scheme,
135                             &components.host,
136                             &components.match_subdomains,
137                             &components.port,
138                             &components.path,
139                             &components.query)) {
140       LOG(ERROR) << "Invalid pattern " << pattern;
141       continue;
142     }
143
144     scoped_refptr<URLMatcherConditionSet> condition_set =
145         CreateConditionSet(url_matcher_.get(),
146                            ++id_,
147                            components.scheme,
148                            components.host,
149                            components.match_subdomains,
150                            components.port,
151                            components.path,
152                            components.query,
153                            allow);
154     components.number_of_key_value_pairs =
155         condition_set->query_conditions().size();
156     all_conditions.push_back(condition_set);
157     filters_[id_] = components;
158   }
159   url_matcher_->AddConditionSets(all_conditions);
160 }
161
162 void URLBlacklist::Block(const base::ListValue* filters) {
163   AddFilters(false, filters);
164 }
165
166 void URLBlacklist::Allow(const base::ListValue* filters) {
167   AddFilters(true, filters);
168 }
169
170 bool URLBlacklist::IsURLBlocked(const GURL& url) const {
171   std::set<URLMatcherConditionSet::ID> matching_ids =
172       url_matcher_->MatchURL(url);
173
174   const FilterComponents* max = NULL;
175   for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
176        id != matching_ids.end(); ++id) {
177     std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
178     DCHECK(it != filters_.end());
179     const FilterComponents& filter = it->second;
180     if (!max || FilterTakesPrecedence(filter, *max))
181       max = &filter;
182   }
183
184   // Default to allow.
185   if (!max)
186     return false;
187
188   return !max->allow;
189 }
190
191 size_t URLBlacklist::Size() const {
192   return filters_.size();
193 }
194
195 // static
196 bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
197                                       const std::string& filter,
198                                       std::string* scheme,
199                                       std::string* host,
200                                       bool* match_subdomains,
201                                       uint16* port,
202                                       std::string* path,
203                                       std::string* query) {
204   url::Parsed parsed;
205
206   if (segment_url(filter, &parsed) == kFileScheme) {
207     base::FilePath file_path;
208     if (!net::FileURLToFilePath(GURL(filter), &file_path))
209       return false;
210
211     *scheme = kFileScheme;
212     host->clear();
213     *match_subdomains = true;
214     *port = 0;
215     // Special path when the |filter| is 'file://*'.
216     *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
217 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
218     // Separators have to be canonicalized on Windows.
219     std::replace(path->begin(), path->end(), '\\', '/');
220     *path = "/" + *path;
221 #endif
222     return true;
223   }
224
225   if (!parsed.host.is_nonempty())
226     return false;
227
228   if (parsed.scheme.is_nonempty())
229     scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
230   else
231     scheme->clear();
232
233   host->assign(filter, parsed.host.begin, parsed.host.len);
234   // Special '*' host, matches all hosts.
235   if (*host == "*") {
236     host->clear();
237     *match_subdomains = true;
238   } else if ((*host)[0] == '.') {
239     // A leading dot in the pattern syntax means that we don't want to match
240     // subdomains.
241     host->erase(0, 1);
242     *match_subdomains = false;
243   } else {
244     url::RawCanonOutputT<char> output;
245     url::CanonHostInfo host_info;
246     url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output,
247                                  &host_info);
248     if (host_info.family == url::CanonHostInfo::NEUTRAL) {
249       // We want to match subdomains. Add a dot in front to make sure we only
250       // match at domain component boundaries.
251       *host = "." + *host;
252       *match_subdomains = true;
253     } else {
254       *match_subdomains = false;
255     }
256   }
257
258   if (parsed.port.is_nonempty()) {
259     int int_port;
260     if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
261                            &int_port)) {
262       return false;
263     }
264     if (int_port <= 0 || int_port > kuint16max)
265       return false;
266     *port = int_port;
267   } else {
268     // Match any port.
269     *port = 0;
270   }
271
272   if (parsed.path.is_nonempty())
273     path->assign(filter, parsed.path.begin, parsed.path.len);
274   else
275     path->clear();
276
277   if (query) {
278     if (parsed.query.is_nonempty())
279       query->assign(filter, parsed.query.begin, parsed.query.len);
280     else
281       query->clear();
282   }
283
284   return true;
285 }
286
287 // static
288 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
289     URLMatcher* url_matcher,
290     int id,
291     const std::string& scheme,
292     const std::string& host,
293     bool match_subdomains,
294     uint16 port,
295     const std::string& path,
296     const std::string& query,
297     bool allow) {
298   URLMatcherConditionFactory* condition_factory =
299       url_matcher->condition_factory();
300   std::set<URLMatcherCondition> conditions;
301   conditions.insert(match_subdomains ?
302       condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
303       condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
304
305   std::set<URLQueryElementMatcherCondition> query_conditions;
306   if (!query.empty()) {
307     ProcessQueryToConditions(
308         condition_factory, query, allow, &query_conditions);
309   }
310
311   scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
312   if (!scheme.empty())
313     scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
314
315   scoped_ptr<URLMatcherPortFilter> port_filter;
316   if (port != 0) {
317     std::vector<URLMatcherPortFilter::Range> ranges;
318     ranges.push_back(URLMatcherPortFilter::CreateRange(port));
319     port_filter.reset(new URLMatcherPortFilter(ranges));
320   }
321
322   return new URLMatcherConditionSet(id,
323                                     conditions,
324                                     query_conditions,
325                                     scheme_filter.Pass(),
326                                     port_filter.Pass());
327 }
328
329 // static
330 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
331                                          const FilterComponents& rhs) {
332   if (lhs.match_subdomains && !rhs.match_subdomains)
333     return false;
334   if (!lhs.match_subdomains && rhs.match_subdomains)
335     return true;
336
337   size_t host_length = lhs.host.length();
338   size_t other_host_length = rhs.host.length();
339   if (host_length != other_host_length)
340     return host_length > other_host_length;
341
342   size_t path_length = lhs.path.length();
343   size_t other_path_length = rhs.path.length();
344   if (path_length != other_path_length)
345     return path_length > other_path_length;
346
347   if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs)
348     return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs;
349
350   if (lhs.allow && !rhs.allow)
351     return true;
352
353   return false;
354 }
355
356 URLBlacklistManager::URLBlacklistManager(
357     PrefService* pref_service,
358     const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
359     const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
360     URLBlacklist::SegmentURLCallback segment_url,
361     OverrideBlacklistCallback override_blacklist)
362     : ui_weak_ptr_factory_(this),
363       pref_service_(pref_service),
364       background_task_runner_(background_task_runner),
365       io_task_runner_(io_task_runner),
366       segment_url_(segment_url),
367       override_blacklist_(override_blacklist),
368       io_weak_ptr_factory_(this),
369       ui_task_runner_(base::MessageLoopProxy::current()),
370       blacklist_(new URLBlacklist(segment_url)) {
371   pref_change_registrar_.Init(pref_service_);
372   base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
373                                       base::Unretained(this));
374   pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
375   pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
376
377   // Start enforcing the policies without a delay when they are present at
378   // startup.
379   if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
380     Update();
381 }
382
383 void URLBlacklistManager::ShutdownOnUIThread() {
384   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
385   // Cancel any pending updates, and stop listening for pref change updates.
386   ui_weak_ptr_factory_.InvalidateWeakPtrs();
387   pref_change_registrar_.RemoveAll();
388 }
389
390 URLBlacklistManager::~URLBlacklistManager() {
391 }
392
393 void URLBlacklistManager::ScheduleUpdate() {
394   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
395   // Cancel pending updates, if any. This can happen if two preferences that
396   // change the blacklist are updated in one message loop cycle. In those cases,
397   // only rebuild the blacklist after all the preference updates are processed.
398   ui_weak_ptr_factory_.InvalidateWeakPtrs();
399   ui_task_runner_->PostTask(
400       FROM_HERE,
401       base::Bind(&URLBlacklistManager::Update,
402                  ui_weak_ptr_factory_.GetWeakPtr()));
403 }
404
405 void URLBlacklistManager::Update() {
406   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
407
408   // The preferences can only be read on the UI thread.
409   scoped_ptr<base::ListValue> block(
410       pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
411   scoped_ptr<base::ListValue> allow(
412       pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
413
414   // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
415   // here, since this task will always execute before a potential deletion of
416   // ProfileIOData on IO.
417   io_task_runner_->PostTask(FROM_HERE,
418                             base::Bind(&URLBlacklistManager::UpdateOnIO,
419                                        base::Unretained(this),
420                                        base::Passed(&block),
421                                        base::Passed(&allow)));
422 }
423
424 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
425                                      scoped_ptr<base::ListValue> allow) {
426   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
427   // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
428   // to the URLBlacklistManager on IO.
429   base::PostTaskAndReplyWithResult(
430       background_task_runner_,
431       FROM_HERE,
432       base::Bind(&BuildBlacklist,
433                  base::Passed(&block),
434                  base::Passed(&allow),
435                  segment_url_),
436       base::Bind(&URLBlacklistManager::SetBlacklist,
437                  io_weak_ptr_factory_.GetWeakPtr()));
438 }
439
440 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
441   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
442   blacklist_ = blacklist.Pass();
443 }
444
445 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
446   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
447   return blacklist_->IsURLBlocked(url);
448 }
449
450 bool URLBlacklistManager::IsRequestBlocked(
451     const net::URLRequest& request, int* reason) const {
452   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
453 #if !defined(OS_IOS)
454   // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
455   int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
456   if ((request.load_flags() & filter_flags) == 0)
457     return false;
458 #endif
459
460   bool block = false;
461   if (override_blacklist_.Run(request.url(), &block, reason))
462     return block;
463
464   *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR;
465   return IsURLBlocked(request.url());
466 }
467
468 // static
469 void URLBlacklistManager::RegisterProfilePrefs(
470     user_prefs::PrefRegistrySyncable* registry) {
471   registry->RegisterListPref(policy_prefs::kUrlBlacklist,
472                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
473   registry->RegisterListPref(policy_prefs::kUrlWhitelist,
474                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
475 }
476
477 }  // namespace policy