1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
18 //------------------------------------------------------------------------------
21 // Adjust SDCH limits downwards for mobile.
22 #if defined(OS_ANDROID) || defined(OS_IOS)
24 const size_t SdchManager::kMaxDictionaryCount = 1;
25 const size_t SdchManager::kMaxDictionarySize = 500 * 1000;
28 const size_t SdchManager::kMaxDictionaryCount = 20;
29 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
33 bool SdchManager::g_sdch_enabled_ = true;
36 bool SdchManager::g_secure_scheme_supported_ = false;
38 //------------------------------------------------------------------------------
39 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
41 const std::string& client_hash,
43 const std::string& domain,
44 const std::string& path,
45 const base::Time& expiration,
46 const std::set<int>& ports)
47 : text_(dictionary_text, offset),
48 client_hash_(client_hash),
52 expiration_(expiration),
56 SdchManager::Dictionary::~Dictionary() {
59 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
60 /* The specific rules of when a dictionary should be advertised in an
61 Avail-Dictionary header are modeled after the rules for cookie scoping. The
62 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
63 dictionary may be advertised in the Avail-Dictionaries header exactly when
64 all of the following are true:
65 1. The server's effective host name domain-matches the Domain attribute of
67 2. If the dictionary has a Port attribute, the request port is one of the
68 ports listed in the Port attribute.
69 3. The request URI path-matches the path header of the dictionary.
70 4. The request is not an HTTPS request.
71 We can override (ignore) item (4) only when we have explicitly enabled
72 HTTPS support AND the dictionary acquisition scheme matches the target
75 if (!DomainMatch(target_url, domain_))
77 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
79 if (path_.size() && !PathMatch(target_url.path(), path_))
81 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
83 if (target_url.SchemeIsSecure() != url_.SchemeIsSecure())
85 if (base::Time::Now() > expiration_)
90 //------------------------------------------------------------------------------
91 // Security functions restricting loads and use of dictionaries.
94 bool SdchManager::Dictionary::CanSet(const std::string& domain,
95 const std::string& path,
96 const std::set<int>& ports,
97 const GURL& dictionary_url) {
99 A dictionary is invalid and must not be stored if any of the following are
101 1. The dictionary has no Domain attribute.
102 2. The effective host name that derives from the referer URL host name does
103 not domain-match the Domain attribute.
104 3. The Domain attribute is a top level domain.
105 4. The referer URL host is a host domain name (not IP address) and has the
106 form HD, where D is the value of the Domain attribute, and H is a string
107 that contains one or more dots.
108 5. If the dictionary has a Port attribute and the referer URL's port was not
112 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
113 // and hence the conservative approach is to not allow any redirects (if there
114 // were any... then don't allow the dictionary to be set).
116 if (domain.empty()) {
117 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
118 return false; // Domain is required.
120 if (registry_controlled_domains::GetDomainAndRegistry(
122 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
123 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
124 return false; // domain was a TLD.
126 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
127 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
131 std::string referrer_url_host = dictionary_url.host();
132 size_t postfix_domain_index = referrer_url_host.rfind(domain);
133 // See if it is indeed a postfix, or just an internal string.
134 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
135 // It is a postfix... so check to see if there's a dot in the prefix.
136 size_t end_of_host_index = referrer_url_host.find_first_of('.');
137 if (referrer_url_host.npos != end_of_host_index &&
138 end_of_host_index < postfix_domain_index) {
139 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
145 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
146 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
153 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
155 1. The request URL's host name domain-matches the Domain attribute of the
157 2. If the dictionary has a Port attribute, the request port is one of the
158 ports listed in the Port attribute.
159 3. The request URL path-matches the path attribute of the dictionary.
160 4. The request is not an HTTPS request.
161 We can override (ignore) item (4) only when we have explicitly enabled
162 HTTPS support AND the dictionary acquisition scheme matches the target
165 if (!DomainMatch(referring_url, domain_)) {
166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
170 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
171 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
174 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
175 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
178 if (!SdchManager::secure_scheme_supported() &&
179 referring_url.SchemeIsSecure()) {
180 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
183 if (referring_url.SchemeIsSecure() != url_.SchemeIsSecure()) {
184 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
188 // TODO(jar): Remove overly restrictive failsafe test (added per security
189 // review) when we have a need to be more general.
190 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
191 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
198 bool SdchManager::Dictionary::PathMatch(const std::string& path,
199 const std::string& restriction) {
202 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
203 character following P2 in P1 is "/".
205 if (path == restriction)
207 size_t prefix_length = restriction.size();
208 if (prefix_length > path.size())
209 return false; // Can't be a prefix.
210 if (0 != path.compare(0, prefix_length, restriction))
212 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
216 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
217 const std::string& restriction) {
218 // TODO(jar): This is not precisely a domain match definition.
219 return gurl.DomainIs(restriction.data(), restriction.size());
222 //------------------------------------------------------------------------------
223 SdchManager::SdchManager() {
224 DCHECK(CalledOnValidThread());
227 SdchManager::~SdchManager() {
228 DCHECK(CalledOnValidThread());
229 while (!dictionaries_.empty()) {
230 DictionaryMap::iterator it = dictionaries_.begin();
231 dictionaries_.erase(it->first);
235 void SdchManager::ClearData() {
236 blacklisted_domains_.clear();
237 allow_latency_experiment_.clear();
241 // Note that this may result in not having dictionaries we've advertised
242 // for incoming responses. The window is relatively small (as ClearData()
243 // is not expected to be called frequently), so we rely on meta-refresh
244 // to handle this case.
245 dictionaries_.clear();
249 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
250 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
253 void SdchManager::set_sdch_fetcher(SdchFetcher* fetcher) {
254 DCHECK(CalledOnValidThread());
255 fetcher_.reset(fetcher);
259 void SdchManager::EnableSdchSupport(bool enabled) {
260 g_sdch_enabled_ = enabled;
264 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
265 g_secure_scheme_supported_ = enabled;
268 void SdchManager::BlacklistDomain(const GURL& url,
269 ProblemCodes blacklist_reason) {
270 SetAllowLatencyExperiment(url, false);
272 BlacklistInfo* blacklist_info =
273 &blacklisted_domains_[base::StringToLowerASCII(url.host())];
275 if (blacklist_info->count > 0)
276 return; // Domain is already blacklisted.
278 if (blacklist_info->exponential_count > (INT_MAX - 1) / 2) {
279 blacklist_info->exponential_count = INT_MAX;
281 blacklist_info->exponential_count =
282 blacklist_info->exponential_count * 2 + 1;
285 blacklist_info->count = blacklist_info->exponential_count;
286 blacklist_info->reason = blacklist_reason;
289 void SdchManager::BlacklistDomainForever(const GURL& url,
290 ProblemCodes blacklist_reason) {
291 SetAllowLatencyExperiment(url, false);
293 BlacklistInfo* blacklist_info =
294 &blacklisted_domains_[base::StringToLowerASCII(url.host())];
295 blacklist_info->count = INT_MAX;
296 blacklist_info->exponential_count = INT_MAX;
297 blacklist_info->reason = blacklist_reason;
300 void SdchManager::ClearBlacklistings() {
301 blacklisted_domains_.clear();
304 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
305 BlacklistInfo* blacklist_info = &blacklisted_domains_[
306 base::StringToLowerASCII(domain)];
307 blacklist_info->count = 0;
308 blacklist_info->reason = MIN_PROBLEM_CODE;
311 int SdchManager::BlackListDomainCount(const std::string& domain) {
312 std::string domain_lower(base::StringToLowerASCII(domain));
314 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
316 return blacklisted_domains_[domain_lower].count;
319 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
320 std::string domain_lower(base::StringToLowerASCII(domain));
322 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
324 return blacklisted_domains_[domain_lower].exponential_count;
327 bool SdchManager::IsInSupportedDomain(const GURL& url) {
328 DCHECK(CalledOnValidThread());
329 if (!g_sdch_enabled_ )
332 if (!secure_scheme_supported() && url.SchemeIsSecure())
335 if (blacklisted_domains_.empty())
338 DomainBlacklistInfo::iterator it =
339 blacklisted_domains_.find(base::StringToLowerASCII(url.host()));
340 if (blacklisted_domains_.end() == it || it->second.count == 0)
343 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it->second.reason,
345 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
347 int count = it->second.count - 1;
349 it->second.count = count;
351 it->second.count = 0;
352 it->second.reason = MIN_PROBLEM_CODE;
358 void SdchManager::FetchDictionary(const GURL& request_url,
359 const GURL& dictionary_url) {
360 DCHECK(CalledOnValidThread());
361 if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get())
362 fetcher_->Schedule(dictionary_url);
365 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
366 const GURL& dictionary_url) const {
367 DCHECK(CalledOnValidThread());
368 /* The user agent may retrieve a dictionary from the dictionary URL if all of
369 the following are true:
370 1 The dictionary URL host name matches the referrer URL host name and
372 2 The dictionary URL host name domain matches the parent domain of the
373 referrer URL host name
374 3 The parent domain of the referrer URL host name is not a top level
376 4 The dictionary URL is not an HTTPS URL.
378 // Item (1) above implies item (2). Spec should be updated.
379 // I take "host name match" to be "is identical to"
380 if (referring_url.host() != dictionary_url.host() ||
381 referring_url.scheme() != dictionary_url.scheme()) {
382 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
385 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
386 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
390 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
391 // than current SDCH spec when needed, and justified by security audit.
392 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
393 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
400 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
401 const GURL& dictionary_url) {
402 DCHECK(CalledOnValidThread());
403 std::string client_hash;
404 std::string server_hash;
405 GenerateHash(dictionary_text, &client_hash, &server_hash);
406 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
407 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
408 return false; // Already loaded.
411 std::string domain, path;
413 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
415 if (dictionary_text.empty()) {
416 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
417 return false; // Missing header.
420 size_t header_end = dictionary_text.find("\n\n");
421 if (std::string::npos == header_end) {
422 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
423 return false; // Missing header.
425 size_t line_start = 0; // Start of line being parsed.
427 size_t line_end = dictionary_text.find('\n', line_start);
428 DCHECK(std::string::npos != line_end);
429 DCHECK_LE(line_end, header_end);
431 size_t colon_index = dictionary_text.find(':', line_start);
432 if (std::string::npos == colon_index) {
433 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
434 return false; // Illegal line missing a colon.
437 if (colon_index > line_end)
440 size_t value_start = dictionary_text.find_first_not_of(" \t",
442 if (std::string::npos != value_start) {
443 if (value_start >= line_end)
445 std::string name(dictionary_text, line_start, colon_index - line_start);
446 std::string value(dictionary_text, value_start, line_end - value_start);
447 name = base::StringToLowerASCII(name);
448 if (name == "domain") {
450 } else if (name == "path") {
452 } else if (name == "format-version") {
455 } else if (name == "max-age") {
457 base::StringToInt64(value, &seconds);
458 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
459 } else if (name == "port") {
461 base::StringToInt(value, &port);
467 if (line_end >= header_end)
469 line_start = line_end + 1;
472 if (!IsInSupportedDomain(dictionary_url))
475 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
478 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
479 // useless dictionaries. We should probably have a cache eviction plan,
480 // instead of just blocking additions. For now, with the spec in flux, it
481 // is probably not worth doing eviction handling.
482 if (kMaxDictionarySize < dictionary_text.size()) {
483 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
486 if (kMaxDictionaryCount <= dictionaries_.size()) {
487 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
491 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
492 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
493 << " and server hash " << server_hash;
494 Dictionary* dictionary =
495 new Dictionary(dictionary_text, header_end + 2, client_hash,
496 dictionary_url, domain, path, expiration, ports);
497 dictionaries_[server_hash] = dictionary;
501 void SdchManager::GetVcdiffDictionary(
502 const std::string& server_hash,
503 const GURL& referring_url,
504 scoped_refptr<Dictionary>* dictionary) {
505 DCHECK(CalledOnValidThread());
507 DictionaryMap::iterator it = dictionaries_.find(server_hash);
508 if (it == dictionaries_.end()) {
511 scoped_refptr<Dictionary> matching_dictionary = it->second;
512 if (!IsInSupportedDomain(referring_url))
514 if (!matching_dictionary->CanUse(referring_url))
516 *dictionary = matching_dictionary;
519 // TODO(jar): If we have evictions from the dictionaries_, then we need to
520 // change this interface to return a list of reference counted Dictionary
521 // instances that can be used if/when a server specifies one.
522 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
524 DCHECK(CalledOnValidThread());
526 for (DictionaryMap::iterator it = dictionaries_.begin();
527 it != dictionaries_.end(); ++it) {
528 if (!IsInSupportedDomain(target_url))
530 if (!it->second->CanAdvertise(target_url))
535 list->append(it->second->client_hash());
537 // Watch to see if we have corrupt or numerous dictionaries.
539 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
543 void SdchManager::GenerateHash(const std::string& dictionary_text,
544 std::string* client_hash, std::string* server_hash) {
545 char binary_hash[32];
546 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
548 std::string first_48_bits(&binary_hash[0], 6);
549 std::string second_48_bits(&binary_hash[6], 6);
550 UrlSafeBase64Encode(first_48_bits, client_hash);
551 UrlSafeBase64Encode(second_48_bits, server_hash);
553 DCHECK_EQ(server_hash->length(), 8u);
554 DCHECK_EQ(client_hash->length(), 8u);
557 //------------------------------------------------------------------------------
558 // Methods for supporting latency experiments.
560 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
561 DCHECK(CalledOnValidThread());
562 return allow_latency_experiment_.end() !=
563 allow_latency_experiment_.find(url.host());
566 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
567 DCHECK(CalledOnValidThread());
569 allow_latency_experiment_.insert(url.host());
572 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
573 if (allow_latency_experiment_.end() == it)
574 return; // It was already erased, or never allowed.
575 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
576 allow_latency_experiment_.erase(it);
580 void SdchManager::UrlSafeBase64Encode(const std::string& input,
581 std::string* output) {
582 // Since this is only done during a dictionary load, and hashes are only 8
583 // characters, we just do the simple fixup, rather than rewriting the encoder.
584 base::Base64Encode(input, output);
585 for (size_t i = 0; i < output->size(); ++i) {
586 switch (output->data()[i]) {