1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "extensions/common/matcher/url_matcher.h"
10 #include "base/logging.h"
11 #include "content/public/common/url_constants.h"
13 #include "url/url_canon.h"
15 namespace extensions {
17 // This set of classes implement a mapping of URL Component Patterns, such as
18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns
19 // for use in substring comparisons.
21 // The idea of this mapping is to reduce the problem of comparing many
22 // URL Component Patterns against one URL to the problem of searching many
23 // substrings in one string:
25 // ---------------------- -----------------
26 // | URL Query operator | ----translate----> | StringPattern |
27 // ---------------------- -----------------
33 // ---------------------- -----------------
34 // | URL to compare | | |
35 // | to all URL Query | ----translate----> | String |
37 // ---------------------- -----------------
39 // The reason for this problem reduction is that there are efficient algorithms
40 // for searching many substrings in one string (see Aho-Corasick algorithm).
42 // Additionally, some of the same pieces are reused to implement regular
43 // expression comparisons. The FilteredRE2 implementation for matching many
44 // regular expressions against one string uses prefiltering, in which a set
45 // of substrings (derived from the regexes) are first searched for, to reduce
46 // the number of regular expressions to test; the prefiltering step also
49 // Case 1: {host,path,query}_{prefix,suffix,equals} searches.
50 // ==========================================================
52 // For searches in this class, we normalize URLs as follows:
55 // Remove scheme, port and segment from URL:
56 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
57 // www.example.com/index.html?search=foo
59 // We remove the scheme and port number because they can be checked later
60 // in a secondary filter step. We remove the segment (the #... part) because
61 // this is not guaranteed to be ASCII-7 encoded.
64 // Translate URL to String and add the following position markers:
65 // - BU = Beginning of URL
66 // - ED = End of Domain
69 // Furthermore, the hostname is canonicalized to start with a ".".
71 // Position markers are represented as characters >127, which are therefore
72 // guaranteed not to be part of the ASCII-7 encoded URL character set.
74 // -> www.example.com/index.html?search=foo becomes
75 // BU .www.example.com ED /index.html EP ?search=foo EU
77 // -> www.example.com/index.html becomes
78 // BU .www.example.com ED /index.html EP EU
81 // Translate URL Component Patterns as follows:
83 // host_prefix(prefix) = BU add_missing_dot_prefix(prefix)
84 // -> host_prefix("www.example") = BU .www.example
86 // host_suffix(suffix) = suffix ED
87 // -> host_suffix("example.com") = example.com ED
88 // -> host_suffix(".example.com") = .example.com ED
90 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED
91 // -> host_equals("www.example.com") = BU .www.example.com ED
93 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
95 // With this, we can search the StringPatterns in the normalized URL.
98 // Case 2: url_{prefix,suffix,equals,contains} searches.
99 // =====================================================
101 // Step 1: as above, except that
102 // - the scheme is not removed
103 // - the port is not removed if it is specified and does not match the default
104 // port for the given scheme.
107 // Translate URL to String and add the following position markers:
108 // - BU = Beginning of URL
111 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
112 // BU http://www.example.com:8080/index.html?search=foo EU
113 // -> http://www.example.com:80/index.html?search=foo#first_match becomes
114 // BU http://www.example.com/index.html?search=foo EU
116 // url_prefix(prefix) = BU prefix
117 // -> url_prefix("http://www.example") = BU http://www.example
119 // url_contains(substring) = substring
120 // -> url_contains("index") = index
123 // Case 3: {host,path,query}_contains searches.
124 // ============================================
126 // These kinds of searches are not supported directly but can be derived
127 // by a combination of a url_contains() query followed by an explicit test:
129 // host_contains(str) = url_contains(str) followed by test whether str occurs
130 // in host component of original URL.
131 // -> host_contains("example.co") = example.co
132 // followed by gurl.host().find("example.co");
134 // [similarly for path_contains and query_contains].
137 // Regular expression matching (url_matches searches)
138 // ==================================================
140 // This class also supports matching regular expressions (RE2 syntax)
141 // against full URLs, which are transformed as in case 2.
145 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
146 return criterion == URLMatcherCondition::URL_MATCHES;
149 bool IsOriginAndPathRegexCriterion(URLMatcherCondition::Criterion criterion) {
150 return criterion == URLMatcherCondition::ORIGIN_AND_PATH_MATCHES;
156 // URLMatcherCondition
159 URLMatcherCondition::URLMatcherCondition()
160 : criterion_(HOST_PREFIX),
161 string_pattern_(NULL) {}
163 URLMatcherCondition::~URLMatcherCondition() {}
165 URLMatcherCondition::URLMatcherCondition(
167 const StringPattern* string_pattern)
168 : criterion_(criterion),
169 string_pattern_(string_pattern) {}
171 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
172 : criterion_(rhs.criterion_),
173 string_pattern_(rhs.string_pattern_) {}
175 URLMatcherCondition& URLMatcherCondition::operator=(
176 const URLMatcherCondition& rhs) {
177 criterion_ = rhs.criterion_;
178 string_pattern_ = rhs.string_pattern_;
182 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
183 if (criterion_ < rhs.criterion_) return true;
184 if (criterion_ > rhs.criterion_) return false;
185 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
186 return *string_pattern_ < *rhs.string_pattern_;
187 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
188 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
193 bool URLMatcherCondition::IsFullURLCondition() const {
194 // For these criteria the SubstringMatcher needs to be executed on the
195 // GURL that is canonicalized with
196 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
197 switch (criterion_) {
212 bool URLMatcherCondition::IsRegexCondition() const {
213 return IsRegexCriterion(criterion_);
216 bool URLMatcherCondition::IsOriginAndPathRegexCondition() const {
217 return IsOriginAndPathRegexCriterion(criterion_);
220 bool URLMatcherCondition::IsMatch(
221 const std::set<StringPattern::ID>& matching_patterns,
222 const GURL& url) const {
223 DCHECK(string_pattern_);
224 if (!ContainsKey(matching_patterns, string_pattern_->id()))
226 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
227 // a substring match on the raw URL. In case of a match, we need to verify
228 // that the match was found in the correct component of the URL.
229 switch (criterion_) {
231 return url.host().find(string_pattern_->pattern()) !=
234 return url.path().find(string_pattern_->pattern()) !=
237 return url.query().find(string_pattern_->pattern()) !=
246 // URLMatcherConditionFactory
250 // These are symbols that are not contained in 7-bit ASCII used in GURLs.
251 const char kBeginningOfURL[] = {static_cast<char>(-1), 0};
252 const char kEndOfDomain[] = {static_cast<char>(-2), 0};
253 const char kEndOfPath[] = {static_cast<char>(-3), 0};
254 const char kEndOfURL[] = {static_cast<char>(-4), 0};
257 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
259 URLMatcherConditionFactory::~URLMatcherConditionFactory() {
260 STLDeleteElements(&substring_pattern_singletons_);
261 STLDeleteElements(®ex_pattern_singletons_);
262 STLDeleteElements(&origin_and_path_regex_pattern_singletons_);
265 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
266 const GURL& url) const {
267 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
268 url.path() + kEndOfPath +
269 (url.has_query() ? "?" + url.query() : std::string()) + kEndOfURL;
272 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
273 const std::string& prefix) {
274 return CreateCondition(URLMatcherCondition::HOST_PREFIX,
275 kBeginningOfURL + CanonicalizeHostname(prefix));
278 URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition(
279 const std::string& suffix) {
280 return CreateCondition(URLMatcherCondition::HOST_SUFFIX,
281 suffix + kEndOfDomain);
284 URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition(
285 const std::string& str) {
286 return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str);
289 URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition(
290 const std::string& str) {
291 return CreateCondition(URLMatcherCondition::HOST_EQUALS,
292 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain);
295 URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition(
296 const std::string& prefix) {
297 return CreateCondition(URLMatcherCondition::PATH_PREFIX,
298 kEndOfDomain + prefix);
301 URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition(
302 const std::string& suffix) {
303 return CreateCondition(URLMatcherCondition::PATH_SUFFIX, suffix + kEndOfPath);
306 URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition(
307 const std::string& str) {
308 return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str);
311 URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition(
312 const std::string& str) {
313 return CreateCondition(URLMatcherCondition::PATH_EQUALS,
314 kEndOfDomain + str + kEndOfPath);
317 URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition(
318 const std::string& prefix) {
320 if (!prefix.empty() && prefix[0] == '?')
321 pattern = kEndOfPath + prefix;
323 pattern = kEndOfPath + ('?' + prefix);
325 return CreateCondition(URLMatcherCondition::QUERY_PREFIX, pattern);
328 URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition(
329 const std::string& suffix) {
330 if (!suffix.empty() && suffix[0] == '?') {
331 return CreateQueryEqualsCondition(suffix);
333 return CreateCondition(URLMatcherCondition::QUERY_SUFFIX,
338 URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition(
339 const std::string& str) {
340 if (!str.empty() && str[0] == '?')
341 return CreateQueryPrefixCondition(str);
343 return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str);
346 URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition(
347 const std::string& str) {
349 if (!str.empty() && str[0] == '?')
350 pattern = kEndOfPath + str + kEndOfURL;
352 pattern = kEndOfPath + ('?' + str) + kEndOfURL;
354 return CreateCondition(URLMatcherCondition::QUERY_EQUALS, pattern);
358 URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition(
359 const std::string& host_suffix,
360 const std::string& path_prefix) {
361 return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX,
362 host_suffix + kEndOfDomain + path_prefix);
366 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition(
367 const std::string& host,
368 const std::string& path_prefix) {
369 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX,
370 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain +
374 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
375 const GURL& url) const {
376 GURL::Replacements replacements;
377 replacements.ClearPassword();
378 replacements.ClearUsername();
379 replacements.ClearRef();
380 // Clear port if it is implicit from scheme.
381 if (url.has_port()) {
382 const std::string& port = url.scheme();
383 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
384 url.EffectiveIntPort()) {
385 replacements.ClearPort();
388 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
392 static std::string CanonicalizeURLForRegexSearchesHelper(
395 GURL::Replacements replacements;
396 replacements.ClearPassword();
397 replacements.ClearUsername();
398 replacements.ClearRef();
400 replacements.ClearQuery();
401 // Clear port if it is implicit from scheme.
402 if (url.has_port()) {
403 const std::string& port = url.scheme();
404 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
405 url.EffectiveIntPort()) {
406 replacements.ClearPort();
409 return url.ReplaceComponents(replacements).spec();
412 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
413 const GURL& url) const {
414 return CanonicalizeURLForRegexSearchesHelper(url, false);
418 URLMatcherConditionFactory::CanonicalizeURLForOriginAndPathRegexSearches(
419 const GURL& url) const {
420 return CanonicalizeURLForRegexSearchesHelper(url, true);
423 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
424 const std::string& prefix) {
425 return CreateCondition(URLMatcherCondition::URL_PREFIX,
426 kBeginningOfURL + prefix);
429 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
430 const std::string& suffix) {
431 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
434 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
435 const std::string& str) {
436 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
439 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
440 const std::string& str) {
441 return CreateCondition(URLMatcherCondition::URL_EQUALS,
442 kBeginningOfURL + str + kEndOfURL);
445 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
446 const std::string& regex) {
447 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
451 URLMatcherConditionFactory::CreateOriginAndPathMatchesCondition(
452 const std::string& regex) {
453 return CreateCondition(URLMatcherCondition::ORIGIN_AND_PATH_MATCHES, regex);
456 void URLMatcherConditionFactory::ForgetUnusedPatterns(
457 const std::set<StringPattern::ID>& used_patterns) {
458 PatternSingletons::iterator i = substring_pattern_singletons_.begin();
459 while (i != substring_pattern_singletons_.end()) {
460 if (ContainsKey(used_patterns, (*i)->id())) {
464 substring_pattern_singletons_.erase(i++);
467 i = regex_pattern_singletons_.begin();
468 while (i != regex_pattern_singletons_.end()) {
469 if (ContainsKey(used_patterns, (*i)->id())) {
473 regex_pattern_singletons_.erase(i++);
476 i = origin_and_path_regex_pattern_singletons_.begin();
477 while (i != origin_and_path_regex_pattern_singletons_.end()) {
478 if (ContainsKey(used_patterns, (*i)->id())) {
482 origin_and_path_regex_pattern_singletons_.erase(i++);
487 bool URLMatcherConditionFactory::IsEmpty() const {
488 return substring_pattern_singletons_.empty() &&
489 regex_pattern_singletons_.empty() &&
490 origin_and_path_regex_pattern_singletons_.empty();
493 URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
494 URLMatcherCondition::Criterion criterion,
495 const std::string& pattern) {
496 StringPattern search_pattern(pattern, 0);
497 PatternSingletons* pattern_singletons = NULL;
498 if (IsRegexCriterion(criterion))
499 pattern_singletons = ®ex_pattern_singletons_;
500 else if (IsOriginAndPathRegexCriterion(criterion))
501 pattern_singletons = &origin_and_path_regex_pattern_singletons_;
503 pattern_singletons = &substring_pattern_singletons_;
505 PatternSingletons::const_iterator iter =
506 pattern_singletons->find(&search_pattern);
508 if (iter != pattern_singletons->end()) {
509 return URLMatcherCondition(criterion, *iter);
511 StringPattern* new_pattern =
512 new StringPattern(pattern, id_counter_++);
513 pattern_singletons->insert(new_pattern);
514 return URLMatcherCondition(criterion, new_pattern);
518 std::string URLMatcherConditionFactory::CanonicalizeHostname(
519 const std::string& hostname) const {
520 if (!hostname.empty() && hostname[0] == '.')
523 return "." + hostname;
526 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
528 StringPattern* rhs) const {
529 if (lhs == NULL && rhs != NULL) return true;
530 if (lhs != NULL && rhs != NULL)
531 return lhs->pattern() < rhs->pattern();
532 // Either both are NULL or only rhs is NULL.
537 // URLMatcherSchemeFilter
540 URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter)
542 filters_.push_back(filter);
545 URLMatcherSchemeFilter::URLMatcherSchemeFilter(
546 const std::vector<std::string>& filters)
547 : filters_(filters) {}
549 URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {}
551 bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const {
552 return std::find(filters_.begin(), filters_.end(), url.scheme()) !=
557 // URLMatcherPortFilter
560 URLMatcherPortFilter::URLMatcherPortFilter(
561 const std::vector<URLMatcherPortFilter::Range>& ranges)
564 URLMatcherPortFilter::~URLMatcherPortFilter() {}
566 bool URLMatcherPortFilter::IsMatch(const GURL& url) const {
567 int port = url.EffectiveIntPort();
568 for (std::vector<Range>::const_iterator i = ranges_.begin();
569 i != ranges_.end(); ++i) {
570 if (i->first <= port && port <= i->second)
577 URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int from,
579 return Range(from, to);
583 URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int port) {
584 return Range(port, port);
588 // URLMatcherConditionSet
591 URLMatcherConditionSet::~URLMatcherConditionSet() {}
593 URLMatcherConditionSet::URLMatcherConditionSet(
595 const Conditions& conditions)
597 conditions_(conditions) {}
599 URLMatcherConditionSet::URLMatcherConditionSet(
601 const Conditions& conditions,
602 scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
603 scoped_ptr<URLMatcherPortFilter> port_filter)
605 conditions_(conditions),
606 scheme_filter_(scheme_filter.Pass()),
607 port_filter_(port_filter.Pass()) {}
609 bool URLMatcherConditionSet::IsMatch(
610 const std::set<StringPattern::ID>& matching_patterns,
611 const GURL& url) const {
612 for (Conditions::const_iterator i = conditions_.begin();
613 i != conditions_.end(); ++i) {
614 if (!i->IsMatch(matching_patterns, url))
617 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
619 if (port_filter_.get() && !port_filter_->IsMatch(url))
628 URLMatcher::URLMatcher() {}
630 URLMatcher::~URLMatcher() {}
632 void URLMatcher::AddConditionSets(
633 const URLMatcherConditionSet::Vector& condition_sets) {
634 for (URLMatcherConditionSet::Vector::const_iterator i =
635 condition_sets.begin(); i != condition_sets.end(); ++i) {
636 DCHECK(url_matcher_condition_sets_.find((*i)->id()) ==
637 url_matcher_condition_sets_.end());
638 url_matcher_condition_sets_[(*i)->id()] = *i;
640 UpdateInternalDatastructures();
643 void URLMatcher::RemoveConditionSets(
644 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) {
645 for (std::vector<URLMatcherConditionSet::ID>::const_iterator i =
646 condition_set_ids.begin(); i != condition_set_ids.end(); ++i) {
647 DCHECK(url_matcher_condition_sets_.find(*i) !=
648 url_matcher_condition_sets_.end());
649 url_matcher_condition_sets_.erase(*i);
651 UpdateInternalDatastructures();
654 void URLMatcher::ClearUnusedConditionSets() {
655 UpdateConditionFactory();
658 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(
659 const GURL& url) const {
660 // Find all IDs of StringPatterns that match |url|.
661 // See URLMatcherConditionFactory for the canonicalization of URLs and the
662 // distinction between full url searches and url component searches.
663 std::set<StringPattern::ID> matches;
664 if (!full_url_matcher_.IsEmpty()) {
665 full_url_matcher_.Match(
666 condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
668 if (!url_component_matcher_.IsEmpty()) {
669 url_component_matcher_.Match(
670 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
672 if (!regex_set_matcher_.IsEmpty()) {
673 regex_set_matcher_.Match(
674 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
676 if (!origin_and_path_regex_set_matcher_.IsEmpty()) {
677 origin_and_path_regex_set_matcher_.Match(
678 condition_factory_.CanonicalizeURLForOriginAndPathRegexSearches(url),
682 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
684 std::set<URLMatcherConditionSet::ID> result;
685 for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
686 i != matches.end(); ++i) {
687 // For each URLMatcherConditionSet there is exactly one condition
688 // registered in substring_match_triggers_. This means that the following
689 // logic tests each URLMatcherConditionSet exactly once if it can be
690 // completely fulfilled.
691 StringPatternTriggers::const_iterator triggered_condition_sets_iter =
692 substring_match_triggers_.find(*i);
693 if (triggered_condition_sets_iter == substring_match_triggers_.end())
694 continue; // Not all substring matches are triggers for a condition set.
695 const std::set<URLMatcherConditionSet::ID>& condition_sets =
696 triggered_condition_sets_iter->second;
697 for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
698 condition_sets.begin(); j != condition_sets.end(); ++j) {
699 URLMatcherConditionSets::const_iterator condition_set_iter =
700 url_matcher_condition_sets_.find(*j);
701 DCHECK(condition_set_iter != url_matcher_condition_sets_.end());
702 if (condition_set_iter->second->IsMatch(matches, url))
710 bool URLMatcher::IsEmpty() const {
711 return condition_factory_.IsEmpty() &&
712 url_matcher_condition_sets_.empty() &&
713 substring_match_triggers_.empty() &&
714 full_url_matcher_.IsEmpty() &&
715 url_component_matcher_.IsEmpty() &&
716 regex_set_matcher_.IsEmpty() &&
717 origin_and_path_regex_set_matcher_.IsEmpty() &&
718 registered_full_url_patterns_.empty() &&
719 registered_url_component_patterns_.empty();
722 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
723 // The purpose of |full_url_conditions| is just that we need to execute
724 // the same logic once for Full URL searches and once for URL Component
725 // searches (see URLMatcherConditionFactory).
727 // Determine which patterns need to be registered when this function
729 std::set<const StringPattern*> new_patterns;
730 for (URLMatcherConditionSets::const_iterator condition_set_iter =
731 url_matcher_condition_sets_.begin();
732 condition_set_iter != url_matcher_condition_sets_.end();
733 ++condition_set_iter) {
734 const URLMatcherConditionSet::Conditions& conditions =
735 condition_set_iter->second->conditions();
736 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
737 conditions.begin(); condition_iter != conditions.end();
739 // If we are called to process Full URL searches, ignore others, and
740 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
741 if (!condition_iter->IsRegexCondition() &&
742 !condition_iter->IsOriginAndPathRegexCondition() &&
743 full_url_conditions == condition_iter->IsFullURLCondition())
744 new_patterns.insert(condition_iter->string_pattern());
748 // This is the set of patterns that were registered before this function
750 std::set<const StringPattern*>& registered_patterns =
751 full_url_conditions ? registered_full_url_patterns_
752 : registered_url_component_patterns_;
754 // Add all patterns that are in new_patterns but not in registered_patterns.
755 std::vector<const StringPattern*> patterns_to_register =
756 base::STLSetDifference<std::vector<const StringPattern*> >(
757 new_patterns, registered_patterns);
759 // Remove all patterns that are in registered_patterns but not in
761 std::vector<const StringPattern*> patterns_to_unregister =
762 base::STLSetDifference<std::vector<const StringPattern*> >(
763 registered_patterns, new_patterns);
765 // Update the SubstringSetMatcher.
766 SubstringSetMatcher& url_matcher =
767 full_url_conditions ? full_url_matcher_ : url_component_matcher_;
768 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
769 patterns_to_unregister);
771 // Update the set of registered_patterns for the next time this function
773 registered_patterns.swap(new_patterns);
776 void URLMatcher::UpdateRegexSetMatcher() {
777 std::vector<const StringPattern*> new_patterns;
778 std::vector<const StringPattern*> new_origin_and_path_patterns;
780 for (URLMatcherConditionSets::const_iterator condition_set_iter =
781 url_matcher_condition_sets_.begin();
782 condition_set_iter != url_matcher_condition_sets_.end();
783 ++condition_set_iter) {
784 const URLMatcherConditionSet::Conditions& conditions =
785 condition_set_iter->second->conditions();
786 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
787 conditions.begin(); condition_iter != conditions.end();
789 if (condition_iter->IsRegexCondition()) {
790 new_patterns.push_back(condition_iter->string_pattern());
791 } else if (condition_iter->IsOriginAndPathRegexCondition()) {
792 new_origin_and_path_patterns.push_back(
793 condition_iter->string_pattern());
798 // Start over from scratch. We can't really do better than this, since the
799 // FilteredRE2 backend doesn't support incremental updates.
800 regex_set_matcher_.ClearPatterns();
801 regex_set_matcher_.AddPatterns(new_patterns);
802 origin_and_path_regex_set_matcher_.ClearPatterns();
803 origin_and_path_regex_set_matcher_.AddPatterns(new_origin_and_path_patterns);
806 void URLMatcher::UpdateTriggers() {
807 // Count substring pattern frequencies.
808 std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
809 for (URLMatcherConditionSets::const_iterator condition_set_iter =
810 url_matcher_condition_sets_.begin();
811 condition_set_iter != url_matcher_condition_sets_.end();
812 ++condition_set_iter) {
813 const URLMatcherConditionSet::Conditions& conditions =
814 condition_set_iter->second->conditions();
815 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
816 conditions.begin(); condition_iter != conditions.end();
818 const StringPattern* pattern = condition_iter->string_pattern();
819 substring_pattern_frequencies[pattern->id()]++;
823 // Update trigger conditions: Determine for each URLMatcherConditionSet which
824 // URLMatcherCondition contains a StringPattern that occurs least
825 // frequently in this URLMatcher. We assume that this condition is very
826 // specific and occurs rarely in URLs. If a match occurs for this
827 // URLMatcherCondition, we want to test all other URLMatcherCondition in the
828 // respective URLMatcherConditionSet as well to see whether the entire
829 // URLMatcherConditionSet is considered matching.
830 substring_match_triggers_.clear();
831 for (URLMatcherConditionSets::const_iterator condition_set_iter =
832 url_matcher_condition_sets_.begin();
833 condition_set_iter != url_matcher_condition_sets_.end();
834 ++condition_set_iter) {
835 const URLMatcherConditionSet::Conditions& conditions =
836 condition_set_iter->second->conditions();
837 if (conditions.empty())
839 URLMatcherConditionSet::Conditions::const_iterator condition_iter =
841 StringPattern::ID trigger = condition_iter->string_pattern()->id();
842 // We skip the first element in the following loop.
844 for (; condition_iter != conditions.end(); ++condition_iter) {
845 StringPattern::ID current_id =
846 condition_iter->string_pattern()->id();
847 if (substring_pattern_frequencies[trigger] >
848 substring_pattern_frequencies[current_id]) {
849 trigger = current_id;
852 substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
856 void URLMatcher::UpdateConditionFactory() {
857 std::set<StringPattern::ID> used_patterns;
858 for (URLMatcherConditionSets::const_iterator condition_set_iter =
859 url_matcher_condition_sets_.begin();
860 condition_set_iter != url_matcher_condition_sets_.end();
861 ++condition_set_iter) {
862 const URLMatcherConditionSet::Conditions& conditions =
863 condition_set_iter->second->conditions();
864 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
865 conditions.begin(); condition_iter != conditions.end();
867 used_patterns.insert(condition_iter->string_pattern()->id());
870 condition_factory_.ForgetUnusedPatterns(used_patterns);
873 void URLMatcher::UpdateInternalDatastructures() {
874 UpdateSubstringSetMatcher(false);
875 UpdateSubstringSetMatcher(true);
876 UpdateRegexSetMatcher();
878 UpdateConditionFactory();
881 } // namespace extensions