- add sources.
[platform/framework/web/crosswalk.git] / src / content / browser / storage_partition_impl_map.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/storage_partition_impl_map.h"
6
7 #include "base/bind.h"
8 #include "base/callback.h"
9 #include "base/file_util.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/file_path.h"
12 #include "base/stl_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/threading/sequenced_worker_pool.h"
17 #include "content/browser/appcache/chrome_appcache_service.h"
18 #include "content/browser/fileapi/browser_file_system_helper.h"
19 #include "content/browser/fileapi/chrome_blob_storage_context.h"
20 #include "content/browser/loader/resource_request_info_impl.h"
21 #include "content/browser/resource_context_impl.h"
22 #include "content/browser/storage_partition_impl.h"
23 #include "content/browser/streams/stream.h"
24 #include "content/browser/streams/stream_context.h"
25 #include "content/browser/streams/stream_registry.h"
26 #include "content/browser/streams/stream_url_request_job.h"
27 #include "content/browser/webui/url_data_manager_backend.h"
28 #include "content/public/browser/browser_context.h"
29 #include "content/public/browser/browser_thread.h"
30 #include "content/public/browser/content_browser_client.h"
31 #include "content/public/browser/storage_partition.h"
32 #include "content/public/common/content_constants.h"
33 #include "content/public/common/url_constants.h"
34 #include "crypto/sha2.h"
35 #include "net/url_request/url_request_context.h"
36 #include "net/url_request/url_request_context_getter.h"
37 #include "webkit/browser/blob/blob_storage_context.h"
38 #include "webkit/browser/blob/blob_url_request_job_factory.h"
39 #include "webkit/browser/fileapi/file_system_url_request_job_factory.h"
40 #include "webkit/common/blob/blob_data.h"
41
42 using appcache::AppCacheService;
43 using fileapi::FileSystemContext;
44 using webkit_blob::BlobStorageContext;
45
46 namespace content {
47
48 namespace {
49
50 // A derivative that knows about Streams too.
51 class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler {
52  public:
53   BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context,
54                       StreamContext* stream_context,
55                       fileapi::FileSystemContext* file_system_context)
56       : blob_storage_context_(blob_storage_context),
57         stream_context_(stream_context),
58         file_system_context_(file_system_context) {
59   }
60
61   virtual ~BlobProtocolHandler() {
62   }
63
64   virtual net::URLRequestJob* MaybeCreateJob(
65       net::URLRequest* request,
66       net::NetworkDelegate* network_delegate) const OVERRIDE {
67     scoped_refptr<Stream> stream =
68         stream_context_->registry()->GetStream(request->url());
69     if (stream.get())
70       return new StreamURLRequestJob(request, network_delegate, stream);
71
72     if (!blob_protocol_handler_) {
73       // Construction is deferred because 'this' is constructed on
74       // the main thread but we want blob_protocol_handler_ constructed
75       // on the IO thread.
76       blob_protocol_handler_.reset(
77           new webkit_blob::BlobProtocolHandler(
78               blob_storage_context_->context(),
79               file_system_context_,
80               BrowserThread::GetMessageLoopProxyForThread(
81                   BrowserThread::FILE).get()));
82     }
83     return blob_protocol_handler_->MaybeCreateJob(request, network_delegate);
84   }
85
86  private:
87   const scoped_refptr<ChromeBlobStorageContext> blob_storage_context_;
88   const scoped_refptr<StreamContext> stream_context_;
89   const scoped_refptr<fileapi::FileSystemContext> file_system_context_;
90   mutable scoped_ptr<webkit_blob::BlobProtocolHandler> blob_protocol_handler_;
91   DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler);
92 };
93
94 // These constants are used to create the directory structure under the profile
95 // where renderers with a non-default storage partition keep their persistent
96 // state. This will contain a set of directories that partially mirror the
97 // directory structure of BrowserContext::GetPath().
98 //
99 // The kStoragePartitionDirname contains an extensions directory which is
100 // further partitioned by extension id, followed by another level of directories
101 // for the "default" extension storage partition and one directory for each
102 // persistent partition used by a webview tag. Example:
103 //
104 //   Storage/ext/ABCDEF/def
105 //   Storage/ext/ABCDEF/hash(partition name)
106 //
107 // The code in GetStoragePartitionPath() constructs these path names.
108 //
109 // TODO(nasko): Move extension related path code out of content.
110 const base::FilePath::CharType kStoragePartitionDirname[] =
111     FILE_PATH_LITERAL("Storage");
112 const base::FilePath::CharType kExtensionsDirname[] =
113     FILE_PATH_LITERAL("ext");
114 const base::FilePath::CharType kDefaultPartitionDirname[] =
115     FILE_PATH_LITERAL("def");
116 const base::FilePath::CharType kTrashDirname[] =
117     FILE_PATH_LITERAL("trash");
118
119 // Because partition names are user specified, they can be arbitrarily long
120 // which makes them unsuitable for paths names. We use a truncation of a
121 // SHA256 hash to perform a deterministic shortening of the string. The
122 // kPartitionNameHashBytes constant controls the length of the truncation.
123 // We use 6 bytes, which gives us 99.999% reliability against collisions over
124 // 1 million partition domains.
125 //
126 // Analysis:
127 // We assume that all partition names within one partition domain are
128 // controlled by the the same entity. Thus there is no chance for adverserial
129 // attack and all we care about is accidental collision. To get 5 9s over
130 // 1 million domains, we need the probability of a collision in any one domain
131 // to be
132 //
133 //    p < nroot(1000000, .99999) ~= 10^-11
134 //
135 // We use the following birthday attack approximation to calculate the max
136 // number of unique names for this probability:
137 //
138 //    n(p,H) = sqrt(2*H * ln(1/(1-p)))
139 //
140 // For a 6-byte hash, H = 2^(6*8).  n(10^-11, H) ~= 75
141 //
142 // An average partition domain is likely to have less than 10 unique
143 // partition names which is far lower than 75.
144 //
145 // Note, that for 4 9s of reliability, the limit is 237 partition names per
146 // partition domain.
147 const int kPartitionNameHashBytes = 6;
148
149 // Needed for selecting all files in ObliterateOneDirectory() below.
150 #if defined(OS_POSIX)
151 const int kAllFileTypes = base::FileEnumerator::FILES |
152                           base::FileEnumerator::DIRECTORIES |
153                           base::FileEnumerator::SHOW_SYM_LINKS;
154 #else
155 const int kAllFileTypes = base::FileEnumerator::FILES |
156                           base::FileEnumerator::DIRECTORIES;
157 #endif
158
159 base::FilePath GetStoragePartitionDomainPath(
160     const std::string& partition_domain) {
161   CHECK(IsStringUTF8(partition_domain));
162
163   return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname)
164       .Append(base::FilePath::FromUTF8Unsafe(partition_domain));
165 }
166
167 // Helper function for doing a depth-first deletion of the data on disk.
168 // Examines paths directly in |current_dir| (no recursion) and tries to
169 // delete from disk anything that is in, or isn't a parent of something in
170 // |paths_to_keep|. Paths that need further expansion are added to
171 // |paths_to_consider|.
172 void ObliterateOneDirectory(const base::FilePath& current_dir,
173                             const std::vector<base::FilePath>& paths_to_keep,
174                             std::vector<base::FilePath>* paths_to_consider) {
175   CHECK(current_dir.IsAbsolute());
176
177   base::FileEnumerator enumerator(current_dir, false, kAllFileTypes);
178   for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty();
179        to_delete = enumerator.Next()) {
180     // Enum tracking which of the 3 possible actions to take for |to_delete|.
181     enum { kSkip, kEnqueue, kDelete } action = kDelete;
182
183     for (std::vector<base::FilePath>::const_iterator to_keep =
184              paths_to_keep.begin();
185          to_keep != paths_to_keep.end();
186          ++to_keep) {
187       if (to_delete == *to_keep) {
188         action = kSkip;
189         break;
190       } else if (to_delete.IsParent(*to_keep)) {
191         // |to_delete| contains a path to keep. Add to stack for further
192         // processing.
193         action = kEnqueue;
194         break;
195       }
196     }
197
198     switch (action) {
199       case kDelete:
200         base::DeleteFile(to_delete, true);
201         break;
202
203       case kEnqueue:
204         paths_to_consider->push_back(to_delete);
205         break;
206
207       case kSkip:
208         break;
209     }
210   }
211 }
212
213 // Synchronously attempts to delete |unnormalized_root|, preserving only
214 // entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on
215 // disk, then it completely removes |unnormalized_root|. All paths must be
216 // absolute paths.
217 void BlockingObliteratePath(
218     const base::FilePath& unnormalized_browser_context_root,
219     const base::FilePath& unnormalized_root,
220     const std::vector<base::FilePath>& paths_to_keep,
221     const scoped_refptr<base::TaskRunner>& closure_runner,
222     const base::Closure& on_gc_required) {
223   // Early exit required because MakeAbsoluteFilePath() will fail on POSIX
224   // if |unnormalized_root| does not exist. This is safe because there is
225   // nothing to do in this situation anwyays.
226   if (!base::PathExists(unnormalized_root)) {
227     return;
228   }
229
230   // Never try to obliterate things outside of the browser context root or the
231   // browser context root itself. Die hard.
232   base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root);
233   base::FilePath browser_context_root =
234       base::MakeAbsoluteFilePath(unnormalized_browser_context_root);
235   CHECK(!root.empty());
236   CHECK(!browser_context_root.empty());
237   CHECK(browser_context_root.IsParent(root) && browser_context_root != root);
238
239   // Reduce |paths_to_keep| set to those under the root and actually on disk.
240   std::vector<base::FilePath> valid_paths_to_keep;
241   for (std::vector<base::FilePath>::const_iterator it = paths_to_keep.begin();
242        it != paths_to_keep.end();
243        ++it) {
244     if (root.IsParent(*it) && base::PathExists(*it))
245       valid_paths_to_keep.push_back(*it);
246   }
247
248   // If none of the |paths_to_keep| are valid anymore then we just whack the
249   // root and be done with it.  Otherwise, signal garbage collection and do
250   // a best-effort delete of the on-disk structures.
251   if (valid_paths_to_keep.empty()) {
252     base::DeleteFile(root, true);
253     return;
254   }
255   closure_runner->PostTask(FROM_HERE, on_gc_required);
256
257   // Otherwise, start at the root and delete everything that is not in
258   // |valid_paths_to_keep|.
259   std::vector<base::FilePath> paths_to_consider;
260   paths_to_consider.push_back(root);
261   while(!paths_to_consider.empty()) {
262     base::FilePath path = paths_to_consider.back();
263     paths_to_consider.pop_back();
264     ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider);
265   }
266 }
267
268 // Deletes all entries inside the |storage_root| that are not in the
269 // |active_paths|.  Deletion is done in 2 steps:
270 //
271 //   (1) Moving all garbage collected paths into a trash directory.
272 //   (2) Asynchronously deleting the trash directory.
273 //
274 // The deletion is asynchronous because after (1) completes, calling code can
275 // safely continue to use the paths that had just been garbage collected
276 // without fear of race conditions.
277 //
278 // This code also ignores failed moves rather than attempting a smarter retry.
279 // Moves shouldn't fail here unless there is some out-of-band error (eg.,
280 // FS corruption). Retry logic is dangerous in the general case because
281 // there is not necessarily a guaranteed case where the logic may succeed.
282 //
283 // This function is still named BlockingGarbageCollect() because it does
284 // execute a few filesystem operations synchronously.
285 void BlockingGarbageCollect(
286     const base::FilePath& storage_root,
287     const scoped_refptr<base::TaskRunner>& file_access_runner,
288     scoped_ptr<base::hash_set<base::FilePath> > active_paths) {
289   CHECK(storage_root.IsAbsolute());
290
291   base::FileEnumerator enumerator(storage_root, false, kAllFileTypes);
292   base::FilePath trash_directory;
293   if (!file_util::CreateTemporaryDirInDir(storage_root, kTrashDirname,
294                                           &trash_directory)) {
295     // Unable to continue without creating the trash directory so give up.
296     return;
297   }
298   for (base::FilePath path = enumerator.Next(); !path.empty();
299        path = enumerator.Next()) {
300     if (active_paths->find(path) == active_paths->end() &&
301         path != trash_directory) {
302       // Since |trash_directory| is unique for each run of this function there
303       // can be no colllisions on the move.
304       base::Move(path, trash_directory.Append(path.BaseName()));
305     }
306   }
307
308   file_access_runner->PostTask(
309       FROM_HERE,
310       base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true));
311 }
312
313 }  // namespace
314
315 // static
316 base::FilePath StoragePartitionImplMap::GetStoragePartitionPath(
317     const std::string& partition_domain,
318     const std::string& partition_name) {
319   if (partition_domain.empty())
320     return base::FilePath();
321
322   base::FilePath path = GetStoragePartitionDomainPath(partition_domain);
323
324   // TODO(ajwong): Mangle in-memory into this somehow, either by putting
325   // it into the partition_name, or by manually adding another path component
326   // here.  Otherwise, it's possible to have an in-memory StoragePartition and
327   // a persistent one that return the same FilePath for GetPath().
328   if (!partition_name.empty()) {
329     // For analysis of why we can ignore collisions, see the comment above
330     // kPartitionNameHashBytes.
331     char buffer[kPartitionNameHashBytes];
332     crypto::SHA256HashString(partition_name, &buffer[0],
333                              sizeof(buffer));
334     return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer)));
335   }
336
337   return path.Append(kDefaultPartitionDirname);
338 }
339
340 StoragePartitionImplMap::StoragePartitionImplMap(
341     BrowserContext* browser_context)
342     : browser_context_(browser_context),
343       resource_context_initialized_(false) {
344   // Doing here instead of initializer list cause it's just too ugly to read.
345   base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool();
346   file_access_runner_ =
347       blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken());
348 }
349
350 StoragePartitionImplMap::~StoragePartitionImplMap() {
351   STLDeleteContainerPairSecondPointers(partitions_.begin(),
352                                        partitions_.end());
353 }
354
355 StoragePartitionImpl* StoragePartitionImplMap::Get(
356     const std::string& partition_domain,
357     const std::string& partition_name,
358     bool in_memory) {
359   // Find the previously created partition if it's available.
360   StoragePartitionConfig partition_config(
361       partition_domain, partition_name, in_memory);
362
363   PartitionMap::const_iterator it = partitions_.find(partition_config);
364   if (it != partitions_.end())
365     return it->second;
366
367   base::FilePath partition_path =
368       browser_context_->GetPath().Append(
369           GetStoragePartitionPath(partition_domain, partition_name));
370   StoragePartitionImpl* partition =
371       StoragePartitionImpl::Create(browser_context_, in_memory,
372                                    partition_path);
373   partitions_[partition_config] = partition;
374
375   ChromeBlobStorageContext* blob_storage_context =
376       ChromeBlobStorageContext::GetFor(browser_context_);
377   StreamContext* stream_context = StreamContext::GetFor(browser_context_);
378   ProtocolHandlerMap protocol_handlers;
379   protocol_handlers[chrome::kBlobScheme] =
380       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
381           new BlobProtocolHandler(blob_storage_context,
382                                   stream_context,
383                                   partition->GetFileSystemContext()));
384   protocol_handlers[chrome::kFileSystemScheme] =
385       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
386           CreateFileSystemProtocolHandler(partition->GetFileSystemContext()));
387   protocol_handlers[chrome::kChromeUIScheme] =
388       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
389           URLDataManagerBackend::CreateProtocolHandler(
390               browser_context_->GetResourceContext(),
391               browser_context_->IsOffTheRecord(),
392               partition->GetAppCacheService(),
393               blob_storage_context));
394   std::vector<std::string> additional_webui_schemes;
395   GetContentClient()->browser()->GetAdditionalWebUISchemes(
396       &additional_webui_schemes);
397   for (std::vector<std::string>::const_iterator it =
398            additional_webui_schemes.begin();
399        it != additional_webui_schemes.end();
400        ++it) {
401     protocol_handlers[*it] =
402         linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
403             URLDataManagerBackend::CreateProtocolHandler(
404                 browser_context_->GetResourceContext(),
405                 browser_context_->IsOffTheRecord(),
406                 partition->GetAppCacheService(),
407                 blob_storage_context));
408   }
409   protocol_handlers[chrome::kChromeDevToolsScheme] =
410       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
411           CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(),
412                                         browser_context_->IsOffTheRecord()));
413
414   // These calls must happen after StoragePartitionImpl::Create().
415   if (partition_domain.empty()) {
416     partition->SetURLRequestContext(
417         GetContentClient()->browser()->CreateRequestContext(
418             browser_context_,
419             &protocol_handlers));
420   } else {
421     partition->SetURLRequestContext(
422         GetContentClient()->browser()->CreateRequestContextForStoragePartition(
423             browser_context_, partition->GetPath(), in_memory,
424             &protocol_handlers));
425   }
426   partition->SetMediaURLRequestContext(
427       partition_domain.empty() ?
428       browser_context_->GetMediaRequestContext() :
429       browser_context_->GetMediaRequestContextForStoragePartition(
430           partition->GetPath(), in_memory));
431
432   PostCreateInitialization(partition, in_memory);
433
434   return partition;
435 }
436
437 void StoragePartitionImplMap::AsyncObliterate(
438     const GURL& site,
439     const base::Closure& on_gc_required) {
440   // This method should avoid creating any StoragePartition (which would
441   // create more open file handles) so that it can delete as much of the
442   // data off disk as possible.
443   std::string partition_domain;
444   std::string partition_name;
445   bool in_memory = false;
446   GetContentClient()->browser()->GetStoragePartitionConfigForSite(
447       browser_context_, site, false, &partition_domain,
448       &partition_name, &in_memory);
449
450   // Find the active partitions for the domain. Because these partitions are
451   // active, it is not possible to just delete the directories that contain
452   // the backing data structures without causing the browser to crash. Instead,
453   // of deleteing the directory, we tell each storage context later to
454   // remove any data they have saved. This will leave the directory structure
455   // intact but it will only contain empty databases.
456   std::vector<StoragePartitionImpl*> active_partitions;
457   std::vector<base::FilePath> paths_to_keep;
458   for (PartitionMap::const_iterator it = partitions_.begin();
459        it != partitions_.end();
460        ++it) {
461     const StoragePartitionConfig& config = it->first;
462     if (config.partition_domain == partition_domain) {
463       it->second->ClearDataForUnboundedRange(
464           // All except shader cache.
465           StoragePartition::REMOVE_DATA_MASK_ALL &
466             (~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE),
467           StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL);
468       if (!config.in_memory) {
469         paths_to_keep.push_back(it->second->GetPath());
470       }
471     }
472   }
473
474   // Start a best-effort delete of the on-disk storage excluding paths that are
475   // known to still be in use. This is to delete any previously created
476   // StoragePartition state that just happens to not have been used during this
477   // run of the browser.
478   base::FilePath domain_root = browser_context_->GetPath().Append(
479       GetStoragePartitionDomainPath(partition_domain));
480
481   BrowserThread::PostBlockingPoolTask(
482       FROM_HERE,
483       base::Bind(&BlockingObliteratePath, browser_context_->GetPath(),
484                  domain_root, paths_to_keep,
485                  base::MessageLoopProxy::current(), on_gc_required));
486 }
487
488 void StoragePartitionImplMap::GarbageCollect(
489     scoped_ptr<base::hash_set<base::FilePath> > active_paths,
490     const base::Closure& done) {
491   // Include all paths for current StoragePartitions in the active_paths since
492   // they cannot be deleted safely.
493   for (PartitionMap::const_iterator it = partitions_.begin();
494        it != partitions_.end();
495        ++it) {
496     const StoragePartitionConfig& config = it->first;
497     if (!config.in_memory)
498       active_paths->insert(it->second->GetPath());
499   }
500
501   // Find the directory holding the StoragePartitions and delete everything in
502   // there that isn't considered active.
503   base::FilePath storage_root = browser_context_->GetPath().Append(
504       GetStoragePartitionDomainPath(std::string()));
505   file_access_runner_->PostTaskAndReply(
506       FROM_HERE,
507       base::Bind(&BlockingGarbageCollect, storage_root,
508                  file_access_runner_,
509                  base::Passed(&active_paths)),
510       done);
511 }
512
513 void StoragePartitionImplMap::ForEach(
514     const BrowserContext::StoragePartitionCallback& callback) {
515   for (PartitionMap::const_iterator it = partitions_.begin();
516        it != partitions_.end();
517        ++it) {
518     callback.Run(it->second);
519   }
520 }
521
522 void StoragePartitionImplMap::PostCreateInitialization(
523     StoragePartitionImpl* partition,
524     bool in_memory) {
525   // TODO(ajwong): ResourceContexts no longer have any storage related state.
526   // We should move this into a place where it is called once per
527   // BrowserContext creation rather than piggybacking off the default context
528   // creation.
529   // Note: moving this into Get() before partitions_[] is set causes reentrency.
530   if (!resource_context_initialized_) {
531     resource_context_initialized_ = true;
532     InitializeResourceContext(browser_context_);
533   }
534
535   // Check first to avoid memory leak in unittests.
536   if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) {
537     BrowserThread::PostTask(
538         BrowserThread::IO, FROM_HERE,
539         base::Bind(&ChromeAppCacheService::InitializeOnIOThread,
540                    partition->GetAppCacheService(),
541                    in_memory ? base::FilePath() :
542                        partition->GetPath().Append(kAppCacheDirname),
543                    browser_context_->GetResourceContext(),
544                    make_scoped_refptr(partition->GetURLRequestContext()),
545                    make_scoped_refptr(
546                        browser_context_->GetSpecialStoragePolicy())));
547
548     // We do not call InitializeURLRequestContext() for media contexts because,
549     // other than the HTTP cache, the media contexts share the same backing
550     // objects as their associated "normal" request context.  Thus, the previous
551     // call serves to initialize the media request context for this storage
552     // partition as well.
553   }
554 }
555
556 }  // namespace content