1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/base/sdch_observer.h"
15 #include "net/url_request/url_request_http_job.h"
19 void StripTrailingDot(GURL* gurl) {
20 std::string host(gurl->host());
25 if (*host.rbegin() != '.')
28 host.resize(host.size() - 1);
30 GURL::Replacements replacements;
31 replacements.SetHostStr(host);
32 *gurl = gurl->ReplaceComponents(replacements);
40 //------------------------------------------------------------------------------
43 // Adjust SDCH limits downwards for mobile.
44 #if defined(OS_ANDROID) || defined(OS_IOS)
46 const size_t SdchManager::kMaxDictionaryCount = 1;
47 const size_t SdchManager::kMaxDictionarySize = 500 * 1000;
50 const size_t SdchManager::kMaxDictionaryCount = 20;
51 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
55 bool SdchManager::g_sdch_enabled_ = true;
58 bool SdchManager::g_secure_scheme_supported_ = true;
60 //------------------------------------------------------------------------------
61 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
63 const std::string& client_hash,
65 const std::string& domain,
66 const std::string& path,
67 const base::Time& expiration,
68 const std::set<int>& ports)
69 : text_(dictionary_text, offset),
70 client_hash_(client_hash),
74 expiration_(expiration),
78 SdchManager::Dictionary::~Dictionary() {
81 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
82 /* The specific rules of when a dictionary should be advertised in an
83 Avail-Dictionary header are modeled after the rules for cookie scoping. The
84 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
85 dictionary may be advertised in the Avail-Dictionaries header exactly when
86 all of the following are true:
87 1. The server's effective host name domain-matches the Domain attribute of
89 2. If the dictionary has a Port attribute, the request port is one of the
90 ports listed in the Port attribute.
91 3. The request URI path-matches the path header of the dictionary.
92 4. The request is not an HTTPS request.
93 We can override (ignore) item (4) only when we have explicitly enabled
94 HTTPS support AND the dictionary acquisition scheme matches the target
97 if (!DomainMatch(target_url, domain_))
99 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
101 if (path_.size() && !PathMatch(target_url.path(), path_))
103 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
105 if (target_url.SchemeIsSecure() != url_.SchemeIsSecure())
107 if (base::Time::Now() > expiration_)
112 //------------------------------------------------------------------------------
113 // Security functions restricting loads and use of dictionaries.
116 bool SdchManager::Dictionary::CanSet(const std::string& domain,
117 const std::string& path,
118 const std::set<int>& ports,
119 const GURL& dictionary_url) {
121 A dictionary is invalid and must not be stored if any of the following are
123 1. The dictionary has no Domain attribute.
124 2. The effective host name that derives from the referer URL host name does
125 not domain-match the Domain attribute.
126 3. The Domain attribute is a top level domain.
127 4. The referer URL host is a host domain name (not IP address) and has the
128 form HD, where D is the value of the Domain attribute, and H is a string
129 that contains one or more dots.
130 5. If the dictionary has a Port attribute and the referer URL's port was not
134 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
135 // and hence the conservative approach is to not allow any redirects (if there
136 // were any... then don't allow the dictionary to be set).
138 if (domain.empty()) {
139 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
140 return false; // Domain is required.
142 if (registry_controlled_domains::GetDomainAndRegistry(
144 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
145 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
146 return false; // domain was a TLD.
148 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
149 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
153 std::string referrer_url_host = dictionary_url.host();
154 size_t postfix_domain_index = referrer_url_host.rfind(domain);
155 // See if it is indeed a postfix, or just an internal string.
156 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
157 // It is a postfix... so check to see if there's a dot in the prefix.
158 size_t end_of_host_index = referrer_url_host.find_first_of('.');
159 if (referrer_url_host.npos != end_of_host_index &&
160 end_of_host_index < postfix_domain_index) {
161 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
167 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
168 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
175 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
177 1. The request URL's host name domain-matches the Domain attribute of the
179 2. If the dictionary has a Port attribute, the request port is one of the
180 ports listed in the Port attribute.
181 3. The request URL path-matches the path attribute of the dictionary.
182 4. The request is not an HTTPS request.
183 We can override (ignore) item (4) only when we have explicitly enabled
184 HTTPS support AND the dictionary acquisition scheme matches the target
187 if (!DomainMatch(referring_url, domain_)) {
188 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
192 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
193 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
196 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
197 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
200 if (!SdchManager::secure_scheme_supported() &&
201 referring_url.SchemeIsSecure()) {
202 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
205 if (referring_url.SchemeIsSecure() != url_.SchemeIsSecure()) {
206 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
210 // TODO(jar): Remove overly restrictive failsafe test (added per security
211 // review) when we have a need to be more general.
212 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
213 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
220 bool SdchManager::Dictionary::PathMatch(const std::string& path,
221 const std::string& restriction) {
224 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
225 character following P2 in P1 is "/".
227 if (path == restriction)
229 size_t prefix_length = restriction.size();
230 if (prefix_length > path.size())
231 return false; // Can't be a prefix.
232 if (0 != path.compare(0, prefix_length, restriction))
234 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
238 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
239 const std::string& restriction) {
240 // TODO(jar): This is not precisely a domain match definition.
241 return gurl.DomainIs(restriction.data(), restriction.size());
244 //------------------------------------------------------------------------------
245 SdchManager::SdchManager() {
246 DCHECK(thread_checker_.CalledOnValidThread());
249 SdchManager::~SdchManager() {
250 DCHECK(thread_checker_.CalledOnValidThread());
251 while (!dictionaries_.empty()) {
252 DictionaryMap::iterator it = dictionaries_.begin();
253 dictionaries_.erase(it->first);
257 void SdchManager::ClearData() {
258 blacklisted_domains_.clear();
259 allow_latency_experiment_.clear();
261 // Note that this may result in not having dictionaries we've advertised
262 // for incoming responses. The window is relatively small (as ClearData()
263 // is not expected to be called frequently), so we rely on meta-refresh
264 // to handle this case.
265 dictionaries_.clear();
267 FOR_EACH_OBSERVER(SdchObserver, observers_, OnClearDictionaries(this));
271 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
272 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
276 void SdchManager::EnableSdchSupport(bool enabled) {
277 g_sdch_enabled_ = enabled;
281 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
282 g_secure_scheme_supported_ = enabled;
285 void SdchManager::BlacklistDomain(const GURL& url,
286 ProblemCodes blacklist_reason) {
287 SetAllowLatencyExperiment(url, false);
289 BlacklistInfo* blacklist_info =
290 &blacklisted_domains_[base::StringToLowerASCII(url.host())];
292 if (blacklist_info->count > 0)
293 return; // Domain is already blacklisted.
295 if (blacklist_info->exponential_count > (INT_MAX - 1) / 2) {
296 blacklist_info->exponential_count = INT_MAX;
298 blacklist_info->exponential_count =
299 blacklist_info->exponential_count * 2 + 1;
302 blacklist_info->count = blacklist_info->exponential_count;
303 blacklist_info->reason = blacklist_reason;
306 void SdchManager::BlacklistDomainForever(const GURL& url,
307 ProblemCodes blacklist_reason) {
308 SetAllowLatencyExperiment(url, false);
310 BlacklistInfo* blacklist_info =
311 &blacklisted_domains_[base::StringToLowerASCII(url.host())];
312 blacklist_info->count = INT_MAX;
313 blacklist_info->exponential_count = INT_MAX;
314 blacklist_info->reason = blacklist_reason;
317 void SdchManager::ClearBlacklistings() {
318 blacklisted_domains_.clear();
321 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
322 BlacklistInfo* blacklist_info = &blacklisted_domains_[
323 base::StringToLowerASCII(domain)];
324 blacklist_info->count = 0;
325 blacklist_info->reason = MIN_PROBLEM_CODE;
328 int SdchManager::BlackListDomainCount(const std::string& domain) {
329 std::string domain_lower(base::StringToLowerASCII(domain));
331 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
333 return blacklisted_domains_[domain_lower].count;
336 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
337 std::string domain_lower(base::StringToLowerASCII(domain));
339 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
341 return blacklisted_domains_[domain_lower].exponential_count;
344 bool SdchManager::IsInSupportedDomain(const GURL& url) {
345 DCHECK(thread_checker_.CalledOnValidThread());
346 if (!g_sdch_enabled_ )
349 if (!secure_scheme_supported() && url.SchemeIsSecure())
352 if (blacklisted_domains_.empty())
355 DomainBlacklistInfo::iterator it =
356 blacklisted_domains_.find(base::StringToLowerASCII(url.host()));
357 if (blacklisted_domains_.end() == it || it->second.count == 0)
360 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it->second.reason,
362 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
364 int count = it->second.count - 1;
366 it->second.count = count;
368 it->second.count = 0;
369 it->second.reason = MIN_PROBLEM_CODE;
375 void SdchManager::OnGetDictionary(const GURL& request_url,
376 const GURL& dictionary_url) {
377 if (!CanFetchDictionary(request_url, dictionary_url))
380 FOR_EACH_OBSERVER(SdchObserver,
382 OnGetDictionary(this, request_url, dictionary_url));
385 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
386 const GURL& dictionary_url) const {
387 DCHECK(thread_checker_.CalledOnValidThread());
388 /* The user agent may retrieve a dictionary from the dictionary URL if all of
389 the following are true:
390 1 The dictionary URL host name matches the referrer URL host name and
392 2 The dictionary URL host name domain matches the parent domain of the
393 referrer URL host name
394 3 The parent domain of the referrer URL host name is not a top level
397 // Item (1) above implies item (2). Spec should be updated.
398 // I take "host name match" to be "is identical to"
399 if (referring_url.host() != dictionary_url.host() ||
400 referring_url.scheme() != dictionary_url.scheme()) {
401 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
404 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
405 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
409 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
410 // than current SDCH spec when needed, and justified by security audit.
411 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
412 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
419 void SdchManager::GetVcdiffDictionary(
420 const std::string& server_hash,
421 const GURL& referring_url,
422 scoped_refptr<Dictionary>* dictionary) {
423 DCHECK(thread_checker_.CalledOnValidThread());
425 DictionaryMap::iterator it = dictionaries_.find(server_hash);
426 if (it == dictionaries_.end()) {
429 scoped_refptr<Dictionary> matching_dictionary = it->second;
430 if (!IsInSupportedDomain(referring_url))
432 if (!matching_dictionary->CanUse(referring_url))
434 *dictionary = matching_dictionary;
437 // TODO(jar): If we have evictions from the dictionaries_, then we need to
438 // change this interface to return a list of reference counted Dictionary
439 // instances that can be used if/when a server specifies one.
440 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
442 DCHECK(thread_checker_.CalledOnValidThread());
444 for (DictionaryMap::iterator it = dictionaries_.begin();
445 it != dictionaries_.end(); ++it) {
446 if (!IsInSupportedDomain(target_url))
448 if (!it->second->CanAdvertise(target_url))
453 list->append(it->second->client_hash());
455 // Watch to see if we have corrupt or numerous dictionaries.
457 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
461 void SdchManager::GenerateHash(const std::string& dictionary_text,
462 std::string* client_hash, std::string* server_hash) {
463 char binary_hash[32];
464 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
466 std::string first_48_bits(&binary_hash[0], 6);
467 std::string second_48_bits(&binary_hash[6], 6);
468 UrlSafeBase64Encode(first_48_bits, client_hash);
469 UrlSafeBase64Encode(second_48_bits, server_hash);
471 DCHECK_EQ(server_hash->length(), 8u);
472 DCHECK_EQ(client_hash->length(), 8u);
475 //------------------------------------------------------------------------------
476 // Methods for supporting latency experiments.
478 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
479 DCHECK(thread_checker_.CalledOnValidThread());
480 return allow_latency_experiment_.end() !=
481 allow_latency_experiment_.find(url.host());
484 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
485 DCHECK(thread_checker_.CalledOnValidThread());
487 allow_latency_experiment_.insert(url.host());
490 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
491 if (allow_latency_experiment_.end() == it)
492 return; // It was already erased, or never allowed.
493 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
494 allow_latency_experiment_.erase(it);
497 void SdchManager::AddObserver(SdchObserver* observer) {
498 observers_.AddObserver(observer);
501 void SdchManager::RemoveObserver(SdchObserver* observer) {
502 observers_.RemoveObserver(observer);
505 void SdchManager::AddSdchDictionary(const std::string& dictionary_text,
506 const GURL& dictionary_url) {
507 DCHECK(thread_checker_.CalledOnValidThread());
508 std::string client_hash;
509 std::string server_hash;
510 GenerateHash(dictionary_text, &client_hash, &server_hash);
511 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
512 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
513 return; // Already loaded.
516 std::string domain, path;
518 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
520 if (dictionary_text.empty()) {
521 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
522 return; // Missing header.
525 size_t header_end = dictionary_text.find("\n\n");
526 if (std::string::npos == header_end) {
527 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
528 return; // Missing header.
530 size_t line_start = 0; // Start of line being parsed.
532 size_t line_end = dictionary_text.find('\n', line_start);
533 DCHECK(std::string::npos != line_end);
534 DCHECK_LE(line_end, header_end);
536 size_t colon_index = dictionary_text.find(':', line_start);
537 if (std::string::npos == colon_index) {
538 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
539 return; // Illegal line missing a colon.
542 if (colon_index > line_end)
545 size_t value_start = dictionary_text.find_first_not_of(" \t",
547 if (std::string::npos != value_start) {
548 if (value_start >= line_end)
550 std::string name(dictionary_text, line_start, colon_index - line_start);
551 std::string value(dictionary_text, value_start, line_end - value_start);
552 name = base::StringToLowerASCII(name);
553 if (name == "domain") {
555 } else if (name == "path") {
557 } else if (name == "format-version") {
560 } else if (name == "max-age") {
562 base::StringToInt64(value, &seconds);
563 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
564 } else if (name == "port") {
566 base::StringToInt(value, &port);
572 if (line_end >= header_end)
574 line_start = line_end + 1;
577 // Narrow fix for http://crbug.com/389451.
578 GURL dictionary_url_normalized(dictionary_url);
579 StripTrailingDot(&dictionary_url_normalized);
581 if (!IsInSupportedDomain(dictionary_url_normalized))
584 if (!Dictionary::CanSet(domain, path, ports, dictionary_url_normalized))
587 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
588 // useless dictionaries. We should probably have a cache eviction plan,
589 // instead of just blocking additions. For now, with the spec in flux, it
590 // is probably not worth doing eviction handling.
591 if (kMaxDictionarySize < dictionary_text.size()) {
592 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
595 if (kMaxDictionaryCount <= dictionaries_.size()) {
596 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
600 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
601 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
602 << " and server hash " << server_hash;
603 Dictionary* dictionary =
604 new Dictionary(dictionary_text, header_end + 2, client_hash,
605 dictionary_url_normalized, domain,
606 path, expiration, ports);
607 dictionaries_[server_hash] = dictionary;
612 void SdchManager::UrlSafeBase64Encode(const std::string& input,
613 std::string* output) {
614 // Since this is only done during a dictionary load, and hashes are only 8
615 // characters, we just do the simple fixup, rather than rewriting the encoder.
616 base::Base64Encode(input, output);
617 std::replace(output->begin(), output->end(), '+', '-');
618 std::replace(output->begin(), output->end(), '/', '_');