1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/policy/core/browser/url_blacklist_manager.h"
8 #include "base/files/file_path.h"
9 #include "base/location.h"
10 #include "base/message_loop/message_loop_proxy.h"
11 #include "base/prefs/pref_service.h"
12 #include "base/sequenced_task_runner.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/task_runner_util.h"
16 #include "base/values.h"
17 #include "components/policy/core/common/policy_pref_names.h"
18 #include "components/pref_registry/pref_registry_syncable.h"
19 #include "net/base/filename_util.h"
20 #include "net/base/load_flags.h"
21 #include "net/base/net_errors.h"
22 #include "net/url_request/url_request.h"
23 #include "url/url_constants.h"
24 #include "url/url_parse.h"
26 using url_matcher::URLMatcher;
27 using url_matcher::URLMatcherCondition;
28 using url_matcher::URLMatcherConditionFactory;
29 using url_matcher::URLMatcherConditionSet;
30 using url_matcher::URLMatcherPortFilter;
31 using url_matcher::URLMatcherSchemeFilter;
32 using url_matcher::URLQueryElementMatcherCondition;
38 // List of schemes of URLs that should not be blocked by the "*" wildcard in
39 // the blacklist. Note that URLs with these schemes can still be blocked with
40 // a more specific filter e.g. "chrome-extension://*".
41 // The schemes are hardcoded here to avoid dependencies on //extensions and
43 const char* kBypassBlacklistWildcardForSchemes[] = {
44 // For internal extension URLs e.g. the Bookmark Manager and the File
45 // Manager on Chrome OS.
51 // NTP on other platforms.
55 // Maximum filters per policy. Filters over this index are ignored.
56 const size_t kMaxFiltersPerPolicy = 1000;
58 // A task that builds the blacklist on a background thread.
59 scoped_ptr<URLBlacklist> BuildBlacklist(
60 scoped_ptr<base::ListValue> block,
61 scoped_ptr<base::ListValue> allow,
62 URLBlacklist::SegmentURLCallback segment_url) {
63 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
64 blacklist->Block(block.get());
65 blacklist->Allow(allow.get());
66 return blacklist.Pass();
69 // Tokenise the parameter |query| and add appropriate query element matcher
70 // conditions to the |query_conditions|.
71 void ProcessQueryToConditions(
72 url_matcher::URLMatcherConditionFactory* condition_factory,
73 const std::string& query,
75 std::set<URLQueryElementMatcherCondition>* query_conditions) {
76 url::Component query_left = url::MakeRange(0, query.length());
79 // Depending on the filter type being black-list or white-list, the matcher
80 // choose any or every match. The idea is a URL should be black-listed if
81 // there is any occurrence of the key value pair. It should be white-listed
82 // only if every occurrence of the key is followed by the value. This avoids
83 // situations such as a user appending a white-listed video parameter in the
84 // end of the query and watching a video of his choice (the last parameter is
85 // ignored by some web servers like youtube's).
86 URLQueryElementMatcherCondition::Type match_type =
87 allow ? URLQueryElementMatcherCondition::MATCH_ALL
88 : URLQueryElementMatcherCondition::MATCH_ANY;
90 while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) {
91 URLQueryElementMatcherCondition::QueryElementType query_element_type =
92 value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE
93 : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY;
94 URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type;
95 if (!value.len && key.len && query[key.end() - 1] == '*') {
97 query_value_match_type =
98 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
99 } else if (value.len && query[value.end() - 1] == '*') {
101 query_value_match_type =
102 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
104 query_value_match_type =
105 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT;
107 query_conditions->insert(
108 URLQueryElementMatcherCondition(query.substr(key.begin, key.len),
109 query.substr(value.begin, value.len),
110 query_value_match_type,
117 bool BypassBlacklistWildcardForURL(const GURL& url) {
118 const std::string& scheme = url.scheme();
119 for (size_t i = 0; i < arraysize(kBypassBlacklistWildcardForSchemes); ++i) {
120 if (scheme == kBypassBlacklistWildcardForSchemes[i])
128 struct URLBlacklist::FilterComponents {
129 FilterComponents() : port(0), match_subdomains(true), allow(true) {}
130 ~FilterComponents() {}
132 // Returns true if |this| represents the "*" filter in the blacklist.
133 bool IsBlacklistWildcard() const {
134 return !allow && host.empty() && scheme.empty() && path.empty() &&
135 query.empty() && port == 0 && number_of_key_value_pairs == 0 &&
144 int number_of_key_value_pairs;
145 bool match_subdomains;
149 URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
150 : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
152 URLBlacklist::~URLBlacklist() {}
154 void URLBlacklist::AddFilters(bool allow,
155 const base::ListValue* list) {
156 URLMatcherConditionSet::Vector all_conditions;
157 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
158 for (size_t i = 0; i < size; ++i) {
160 bool success = list->GetString(i, &pattern);
162 FilterComponents components;
163 components.allow = allow;
164 if (!FilterToComponents(segment_url_,
168 &components.match_subdomains,
171 &components.query)) {
172 LOG(ERROR) << "Invalid pattern " << pattern;
176 scoped_refptr<URLMatcherConditionSet> condition_set =
177 CreateConditionSet(url_matcher_.get(),
181 components.match_subdomains,
186 components.number_of_key_value_pairs =
187 condition_set->query_conditions().size();
188 all_conditions.push_back(condition_set);
189 filters_[id_] = components;
191 url_matcher_->AddConditionSets(all_conditions);
194 void URLBlacklist::Block(const base::ListValue* filters) {
195 AddFilters(false, filters);
198 void URLBlacklist::Allow(const base::ListValue* filters) {
199 AddFilters(true, filters);
202 bool URLBlacklist::IsURLBlocked(const GURL& url) const {
203 std::set<URLMatcherConditionSet::ID> matching_ids =
204 url_matcher_->MatchURL(url);
206 const FilterComponents* max = NULL;
207 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
208 id != matching_ids.end(); ++id) {
209 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
210 DCHECK(it != filters_.end());
211 const FilterComponents& filter = it->second;
212 if (!max || FilterTakesPrecedence(filter, *max))
220 // Some of the internal Chrome URLs are not affected by the "*" in the
221 // blacklist. Note that the "*" is the lowest priority filter possible, so
222 // any higher priority filter will be applied first.
223 if (max->IsBlacklistWildcard() && BypassBlacklistWildcardForURL(url))
229 size_t URLBlacklist::Size() const {
230 return filters_.size();
234 bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
235 const std::string& filter,
238 bool* match_subdomains,
241 std::string* query) {
244 if (segment_url(filter, &parsed) == url::kFileScheme) {
245 base::FilePath file_path;
246 if (!net::FileURLToFilePath(GURL(filter), &file_path))
249 *scheme = url::kFileScheme;
251 *match_subdomains = true;
253 // Special path when the |filter| is 'file://*'.
254 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
255 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
256 // Separators have to be canonicalized on Windows.
257 std::replace(path->begin(), path->end(), '\\', '/');
263 if (!parsed.host.is_nonempty())
266 if (parsed.scheme.is_nonempty())
267 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
271 host->assign(filter, parsed.host.begin, parsed.host.len);
272 // Special '*' host, matches all hosts.
275 *match_subdomains = true;
276 } else if ((*host)[0] == '.') {
277 // A leading dot in the pattern syntax means that we don't want to match
280 *match_subdomains = false;
282 url::RawCanonOutputT<char> output;
283 url::CanonHostInfo host_info;
284 url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output,
286 if (host_info.family == url::CanonHostInfo::NEUTRAL) {
287 // We want to match subdomains. Add a dot in front to make sure we only
288 // match at domain component boundaries.
290 *match_subdomains = true;
292 *match_subdomains = false;
296 if (parsed.port.is_nonempty()) {
298 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
302 if (int_port <= 0 || int_port > kuint16max)
310 if (parsed.path.is_nonempty())
311 path->assign(filter, parsed.path.begin, parsed.path.len);
316 if (parsed.query.is_nonempty())
317 query->assign(filter, parsed.query.begin, parsed.query.len);
326 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
327 URLMatcher* url_matcher,
329 const std::string& scheme,
330 const std::string& host,
331 bool match_subdomains,
333 const std::string& path,
334 const std::string& query,
336 URLMatcherConditionFactory* condition_factory =
337 url_matcher->condition_factory();
338 std::set<URLMatcherCondition> conditions;
339 conditions.insert(match_subdomains ?
340 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
341 condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
343 std::set<URLQueryElementMatcherCondition> query_conditions;
344 if (!query.empty()) {
345 ProcessQueryToConditions(
346 condition_factory, query, allow, &query_conditions);
349 scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
351 scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
353 scoped_ptr<URLMatcherPortFilter> port_filter;
355 std::vector<URLMatcherPortFilter::Range> ranges;
356 ranges.push_back(URLMatcherPortFilter::CreateRange(port));
357 port_filter.reset(new URLMatcherPortFilter(ranges));
360 return new URLMatcherConditionSet(id,
363 scheme_filter.Pass(),
368 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
369 const FilterComponents& rhs) {
370 // The "*" wildcard is the lowest priority filter.
371 if (rhs.IsBlacklistWildcard())
374 if (lhs.match_subdomains && !rhs.match_subdomains)
376 if (!lhs.match_subdomains && rhs.match_subdomains)
379 size_t host_length = lhs.host.length();
380 size_t other_host_length = rhs.host.length();
381 if (host_length != other_host_length)
382 return host_length > other_host_length;
384 size_t path_length = lhs.path.length();
385 size_t other_path_length = rhs.path.length();
386 if (path_length != other_path_length)
387 return path_length > other_path_length;
389 if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs)
390 return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs;
392 if (lhs.allow && !rhs.allow)
398 URLBlacklistManager::URLBlacklistManager(
399 PrefService* pref_service,
400 const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
401 const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
402 URLBlacklist::SegmentURLCallback segment_url,
403 OverrideBlacklistCallback override_blacklist)
404 : pref_service_(pref_service),
405 background_task_runner_(background_task_runner),
406 io_task_runner_(io_task_runner),
407 segment_url_(segment_url),
408 override_blacklist_(override_blacklist),
409 ui_task_runner_(base::MessageLoopProxy::current()),
410 blacklist_(new URLBlacklist(segment_url)),
411 ui_weak_ptr_factory_(this),
412 io_weak_ptr_factory_(this) {
413 pref_change_registrar_.Init(pref_service_);
414 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
415 base::Unretained(this));
416 pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
417 pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
419 // Start enforcing the policies without a delay when they are present at
421 if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
425 void URLBlacklistManager::ShutdownOnUIThread() {
426 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
427 // Cancel any pending updates, and stop listening for pref change updates.
428 ui_weak_ptr_factory_.InvalidateWeakPtrs();
429 pref_change_registrar_.RemoveAll();
432 URLBlacklistManager::~URLBlacklistManager() {
435 void URLBlacklistManager::ScheduleUpdate() {
436 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
437 // Cancel pending updates, if any. This can happen if two preferences that
438 // change the blacklist are updated in one message loop cycle. In those cases,
439 // only rebuild the blacklist after all the preference updates are processed.
440 ui_weak_ptr_factory_.InvalidateWeakPtrs();
441 ui_task_runner_->PostTask(
443 base::Bind(&URLBlacklistManager::Update,
444 ui_weak_ptr_factory_.GetWeakPtr()));
447 void URLBlacklistManager::Update() {
448 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
450 // The preferences can only be read on the UI thread.
451 scoped_ptr<base::ListValue> block(
452 pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
453 scoped_ptr<base::ListValue> allow(
454 pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
456 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
457 // here, since this task will always execute before a potential deletion of
458 // ProfileIOData on IO.
459 io_task_runner_->PostTask(FROM_HERE,
460 base::Bind(&URLBlacklistManager::UpdateOnIO,
461 base::Unretained(this),
462 base::Passed(&block),
463 base::Passed(&allow)));
466 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
467 scoped_ptr<base::ListValue> allow) {
468 DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
469 // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
470 // to the URLBlacklistManager on IO.
471 base::PostTaskAndReplyWithResult(
472 background_task_runner_.get(),
474 base::Bind(&BuildBlacklist,
475 base::Passed(&block),
476 base::Passed(&allow),
478 base::Bind(&URLBlacklistManager::SetBlacklist,
479 io_weak_ptr_factory_.GetWeakPtr()));
482 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
483 DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
484 blacklist_ = blacklist.Pass();
487 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
488 DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
489 return blacklist_->IsURLBlocked(url);
492 bool URLBlacklistManager::IsRequestBlocked(
493 const net::URLRequest& request, int* reason) const {
494 DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
496 // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
497 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
498 if ((request.load_flags() & filter_flags) == 0)
503 if (override_blacklist_.Run(request.url(), &block, reason))
506 *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR;
507 return IsURLBlocked(request.url());
511 void URLBlacklistManager::RegisterProfilePrefs(
512 user_prefs::PrefRegistrySyncable* registry) {
513 registry->RegisterListPref(policy_prefs::kUrlBlacklist,
514 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
515 registry->RegisterListPref(policy_prefs::kUrlWhitelist,
516 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
519 } // namespace policy