03fb3120392d843bb4954872f52e71c471b4f06f
[platform/framework/web/crosswalk.git] / src / content / browser / storage_partition_impl_map.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/storage_partition_impl_map.h"
6
7 #include "base/bind.h"
8 #include "base/callback.h"
9 #include "base/file_util.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/file_path.h"
12 #include "base/stl_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/threading/sequenced_worker_pool.h"
17 #include "content/browser/appcache/chrome_appcache_service.h"
18 #include "content/browser/fileapi/browser_file_system_helper.h"
19 #include "content/browser/fileapi/chrome_blob_storage_context.h"
20 #include "content/browser/loader/resource_request_info_impl.h"
21 #include "content/browser/resource_context_impl.h"
22 #include "content/browser/service_worker/service_worker_request_handler.h"
23 #include "content/browser/storage_partition_impl.h"
24 #include "content/browser/streams/stream.h"
25 #include "content/browser/streams/stream_context.h"
26 #include "content/browser/streams/stream_registry.h"
27 #include "content/browser/streams/stream_url_request_job.h"
28 #include "content/browser/webui/url_data_manager_backend.h"
29 #include "content/public/browser/browser_context.h"
30 #include "content/public/browser/browser_thread.h"
31 #include "content/public/browser/content_browser_client.h"
32 #include "content/public/browser/storage_partition.h"
33 #include "content/public/common/content_constants.h"
34 #include "content/public/common/url_constants.h"
35 #include "crypto/sha2.h"
36 #include "net/url_request/url_request_context.h"
37 #include "net/url_request/url_request_context_getter.h"
38 #include "webkit/browser/blob/blob_storage_context.h"
39 #include "webkit/browser/blob/blob_url_request_job_factory.h"
40 #include "webkit/browser/fileapi/file_system_url_request_job_factory.h"
41 #include "webkit/common/blob/blob_data.h"
42
43 using appcache::AppCacheService;
44 using fileapi::FileSystemContext;
45 using webkit_blob::BlobStorageContext;
46
47 namespace content {
48
49 namespace {
50
51 // A derivative that knows about Streams too.
52 class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler {
53  public:
54   BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context,
55                       StreamContext* stream_context,
56                       fileapi::FileSystemContext* file_system_context)
57       : blob_storage_context_(blob_storage_context),
58         stream_context_(stream_context),
59         file_system_context_(file_system_context) {
60   }
61
62   virtual ~BlobProtocolHandler() {
63   }
64
65   virtual net::URLRequestJob* MaybeCreateJob(
66       net::URLRequest* request,
67       net::NetworkDelegate* network_delegate) const OVERRIDE {
68     scoped_refptr<Stream> stream =
69         stream_context_->registry()->GetStream(request->url());
70     if (stream.get())
71       return new StreamURLRequestJob(request, network_delegate, stream);
72
73     if (!blob_protocol_handler_) {
74       // Construction is deferred because 'this' is constructed on
75       // the main thread but we want blob_protocol_handler_ constructed
76       // on the IO thread.
77       blob_protocol_handler_.reset(
78           new webkit_blob::BlobProtocolHandler(
79               blob_storage_context_->context(),
80               file_system_context_,
81               BrowserThread::GetMessageLoopProxyForThread(
82                   BrowserThread::FILE).get()));
83     }
84     return blob_protocol_handler_->MaybeCreateJob(request, network_delegate);
85   }
86
87  private:
88   const scoped_refptr<ChromeBlobStorageContext> blob_storage_context_;
89   const scoped_refptr<StreamContext> stream_context_;
90   const scoped_refptr<fileapi::FileSystemContext> file_system_context_;
91   mutable scoped_ptr<webkit_blob::BlobProtocolHandler> blob_protocol_handler_;
92   DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler);
93 };
94
95 // These constants are used to create the directory structure under the profile
96 // where renderers with a non-default storage partition keep their persistent
97 // state. This will contain a set of directories that partially mirror the
98 // directory structure of BrowserContext::GetPath().
99 //
100 // The kStoragePartitionDirname contains an extensions directory which is
101 // further partitioned by extension id, followed by another level of directories
102 // for the "default" extension storage partition and one directory for each
103 // persistent partition used by a webview tag. Example:
104 //
105 //   Storage/ext/ABCDEF/def
106 //   Storage/ext/ABCDEF/hash(partition name)
107 //
108 // The code in GetStoragePartitionPath() constructs these path names.
109 //
110 // TODO(nasko): Move extension related path code out of content.
111 const base::FilePath::CharType kStoragePartitionDirname[] =
112     FILE_PATH_LITERAL("Storage");
113 const base::FilePath::CharType kExtensionsDirname[] =
114     FILE_PATH_LITERAL("ext");
115 const base::FilePath::CharType kDefaultPartitionDirname[] =
116     FILE_PATH_LITERAL("def");
117 const base::FilePath::CharType kTrashDirname[] =
118     FILE_PATH_LITERAL("trash");
119
120 // Because partition names are user specified, they can be arbitrarily long
121 // which makes them unsuitable for paths names. We use a truncation of a
122 // SHA256 hash to perform a deterministic shortening of the string. The
123 // kPartitionNameHashBytes constant controls the length of the truncation.
124 // We use 6 bytes, which gives us 99.999% reliability against collisions over
125 // 1 million partition domains.
126 //
127 // Analysis:
128 // We assume that all partition names within one partition domain are
129 // controlled by the the same entity. Thus there is no chance for adverserial
130 // attack and all we care about is accidental collision. To get 5 9s over
131 // 1 million domains, we need the probability of a collision in any one domain
132 // to be
133 //
134 //    p < nroot(1000000, .99999) ~= 10^-11
135 //
136 // We use the following birthday attack approximation to calculate the max
137 // number of unique names for this probability:
138 //
139 //    n(p,H) = sqrt(2*H * ln(1/(1-p)))
140 //
141 // For a 6-byte hash, H = 2^(6*8).  n(10^-11, H) ~= 75
142 //
143 // An average partition domain is likely to have less than 10 unique
144 // partition names which is far lower than 75.
145 //
146 // Note, that for 4 9s of reliability, the limit is 237 partition names per
147 // partition domain.
148 const int kPartitionNameHashBytes = 6;
149
150 // Needed for selecting all files in ObliterateOneDirectory() below.
151 #if defined(OS_POSIX)
152 const int kAllFileTypes = base::FileEnumerator::FILES |
153                           base::FileEnumerator::DIRECTORIES |
154                           base::FileEnumerator::SHOW_SYM_LINKS;
155 #else
156 const int kAllFileTypes = base::FileEnumerator::FILES |
157                           base::FileEnumerator::DIRECTORIES;
158 #endif
159
160 base::FilePath GetStoragePartitionDomainPath(
161     const std::string& partition_domain) {
162   CHECK(IsStringUTF8(partition_domain));
163
164   return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname)
165       .Append(base::FilePath::FromUTF8Unsafe(partition_domain));
166 }
167
168 // Helper function for doing a depth-first deletion of the data on disk.
169 // Examines paths directly in |current_dir| (no recursion) and tries to
170 // delete from disk anything that is in, or isn't a parent of something in
171 // |paths_to_keep|. Paths that need further expansion are added to
172 // |paths_to_consider|.
173 void ObliterateOneDirectory(const base::FilePath& current_dir,
174                             const std::vector<base::FilePath>& paths_to_keep,
175                             std::vector<base::FilePath>* paths_to_consider) {
176   CHECK(current_dir.IsAbsolute());
177
178   base::FileEnumerator enumerator(current_dir, false, kAllFileTypes);
179   for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty();
180        to_delete = enumerator.Next()) {
181     // Enum tracking which of the 3 possible actions to take for |to_delete|.
182     enum { kSkip, kEnqueue, kDelete } action = kDelete;
183
184     for (std::vector<base::FilePath>::const_iterator to_keep =
185              paths_to_keep.begin();
186          to_keep != paths_to_keep.end();
187          ++to_keep) {
188       if (to_delete == *to_keep) {
189         action = kSkip;
190         break;
191       } else if (to_delete.IsParent(*to_keep)) {
192         // |to_delete| contains a path to keep. Add to stack for further
193         // processing.
194         action = kEnqueue;
195         break;
196       }
197     }
198
199     switch (action) {
200       case kDelete:
201         base::DeleteFile(to_delete, true);
202         break;
203
204       case kEnqueue:
205         paths_to_consider->push_back(to_delete);
206         break;
207
208       case kSkip:
209         break;
210     }
211   }
212 }
213
214 // Synchronously attempts to delete |unnormalized_root|, preserving only
215 // entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on
216 // disk, then it completely removes |unnormalized_root|. All paths must be
217 // absolute paths.
218 void BlockingObliteratePath(
219     const base::FilePath& unnormalized_browser_context_root,
220     const base::FilePath& unnormalized_root,
221     const std::vector<base::FilePath>& paths_to_keep,
222     const scoped_refptr<base::TaskRunner>& closure_runner,
223     const base::Closure& on_gc_required) {
224   // Early exit required because MakeAbsoluteFilePath() will fail on POSIX
225   // if |unnormalized_root| does not exist. This is safe because there is
226   // nothing to do in this situation anwyays.
227   if (!base::PathExists(unnormalized_root)) {
228     return;
229   }
230
231   // Never try to obliterate things outside of the browser context root or the
232   // browser context root itself. Die hard.
233   base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root);
234   base::FilePath browser_context_root =
235       base::MakeAbsoluteFilePath(unnormalized_browser_context_root);
236   CHECK(!root.empty());
237   CHECK(!browser_context_root.empty());
238   CHECK(browser_context_root.IsParent(root) && browser_context_root != root);
239
240   // Reduce |paths_to_keep| set to those under the root and actually on disk.
241   std::vector<base::FilePath> valid_paths_to_keep;
242   for (std::vector<base::FilePath>::const_iterator it = paths_to_keep.begin();
243        it != paths_to_keep.end();
244        ++it) {
245     if (root.IsParent(*it) && base::PathExists(*it))
246       valid_paths_to_keep.push_back(*it);
247   }
248
249   // If none of the |paths_to_keep| are valid anymore then we just whack the
250   // root and be done with it.  Otherwise, signal garbage collection and do
251   // a best-effort delete of the on-disk structures.
252   if (valid_paths_to_keep.empty()) {
253     base::DeleteFile(root, true);
254     return;
255   }
256   closure_runner->PostTask(FROM_HERE, on_gc_required);
257
258   // Otherwise, start at the root and delete everything that is not in
259   // |valid_paths_to_keep|.
260   std::vector<base::FilePath> paths_to_consider;
261   paths_to_consider.push_back(root);
262   while(!paths_to_consider.empty()) {
263     base::FilePath path = paths_to_consider.back();
264     paths_to_consider.pop_back();
265     ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider);
266   }
267 }
268
269 // Deletes all entries inside the |storage_root| that are not in the
270 // |active_paths|.  Deletion is done in 2 steps:
271 //
272 //   (1) Moving all garbage collected paths into a trash directory.
273 //   (2) Asynchronously deleting the trash directory.
274 //
275 // The deletion is asynchronous because after (1) completes, calling code can
276 // safely continue to use the paths that had just been garbage collected
277 // without fear of race conditions.
278 //
279 // This code also ignores failed moves rather than attempting a smarter retry.
280 // Moves shouldn't fail here unless there is some out-of-band error (eg.,
281 // FS corruption). Retry logic is dangerous in the general case because
282 // there is not necessarily a guaranteed case where the logic may succeed.
283 //
284 // This function is still named BlockingGarbageCollect() because it does
285 // execute a few filesystem operations synchronously.
286 void BlockingGarbageCollect(
287     const base::FilePath& storage_root,
288     const scoped_refptr<base::TaskRunner>& file_access_runner,
289     scoped_ptr<base::hash_set<base::FilePath> > active_paths) {
290   CHECK(storage_root.IsAbsolute());
291
292   base::FileEnumerator enumerator(storage_root, false, kAllFileTypes);
293   base::FilePath trash_directory;
294   if (!base::CreateTemporaryDirInDir(storage_root, kTrashDirname,
295                                      &trash_directory)) {
296     // Unable to continue without creating the trash directory so give up.
297     return;
298   }
299   for (base::FilePath path = enumerator.Next(); !path.empty();
300        path = enumerator.Next()) {
301     if (active_paths->find(path) == active_paths->end() &&
302         path != trash_directory) {
303       // Since |trash_directory| is unique for each run of this function there
304       // can be no colllisions on the move.
305       base::Move(path, trash_directory.Append(path.BaseName()));
306     }
307   }
308
309   file_access_runner->PostTask(
310       FROM_HERE,
311       base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true));
312 }
313
314 }  // namespace
315
316 // static
317 base::FilePath StoragePartitionImplMap::GetStoragePartitionPath(
318     const std::string& partition_domain,
319     const std::string& partition_name) {
320   if (partition_domain.empty())
321     return base::FilePath();
322
323   base::FilePath path = GetStoragePartitionDomainPath(partition_domain);
324
325   // TODO(ajwong): Mangle in-memory into this somehow, either by putting
326   // it into the partition_name, or by manually adding another path component
327   // here.  Otherwise, it's possible to have an in-memory StoragePartition and
328   // a persistent one that return the same FilePath for GetPath().
329   if (!partition_name.empty()) {
330     // For analysis of why we can ignore collisions, see the comment above
331     // kPartitionNameHashBytes.
332     char buffer[kPartitionNameHashBytes];
333     crypto::SHA256HashString(partition_name, &buffer[0],
334                              sizeof(buffer));
335     return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer)));
336   }
337
338   return path.Append(kDefaultPartitionDirname);
339 }
340
341 StoragePartitionImplMap::StoragePartitionImplMap(
342     BrowserContext* browser_context)
343     : browser_context_(browser_context),
344       resource_context_initialized_(false) {
345   // Doing here instead of initializer list cause it's just too ugly to read.
346   base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool();
347   file_access_runner_ =
348       blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken());
349 }
350
351 StoragePartitionImplMap::~StoragePartitionImplMap() {
352   STLDeleteContainerPairSecondPointers(partitions_.begin(),
353                                        partitions_.end());
354 }
355
356 StoragePartitionImpl* StoragePartitionImplMap::Get(
357     const std::string& partition_domain,
358     const std::string& partition_name,
359     bool in_memory) {
360   // Find the previously created partition if it's available.
361   StoragePartitionConfig partition_config(
362       partition_domain, partition_name, in_memory);
363
364   PartitionMap::const_iterator it = partitions_.find(partition_config);
365   if (it != partitions_.end())
366     return it->second;
367
368   base::FilePath partition_path =
369       browser_context_->GetPath().Append(
370           GetStoragePartitionPath(partition_domain, partition_name));
371   StoragePartitionImpl* partition =
372       StoragePartitionImpl::Create(browser_context_, in_memory,
373                                    partition_path);
374   partitions_[partition_config] = partition;
375
376   ChromeBlobStorageContext* blob_storage_context =
377       ChromeBlobStorageContext::GetFor(browser_context_);
378   StreamContext* stream_context = StreamContext::GetFor(browser_context_);
379   ProtocolHandlerMap protocol_handlers;
380   protocol_handlers[kBlobScheme] =
381       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
382           new BlobProtocolHandler(blob_storage_context,
383                                   stream_context,
384                                   partition->GetFileSystemContext()));
385   protocol_handlers[kFileSystemScheme] =
386       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
387           CreateFileSystemProtocolHandler(partition_domain,
388                                           partition->GetFileSystemContext()));
389   protocol_handlers[kChromeUIScheme] =
390       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
391           URLDataManagerBackend::CreateProtocolHandler(
392               browser_context_->GetResourceContext(),
393               browser_context_->IsOffTheRecord(),
394               partition->GetAppCacheService(),
395               blob_storage_context));
396   std::vector<std::string> additional_webui_schemes;
397   GetContentClient()->browser()->GetAdditionalWebUISchemes(
398       &additional_webui_schemes);
399   for (std::vector<std::string>::const_iterator it =
400            additional_webui_schemes.begin();
401        it != additional_webui_schemes.end();
402        ++it) {
403     protocol_handlers[*it] =
404         linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
405             URLDataManagerBackend::CreateProtocolHandler(
406                 browser_context_->GetResourceContext(),
407                 browser_context_->IsOffTheRecord(),
408                 partition->GetAppCacheService(),
409                 blob_storage_context));
410   }
411   protocol_handlers[kChromeDevToolsScheme] =
412       linked_ptr<net::URLRequestJobFactory::ProtocolHandler>(
413           CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(),
414                                         browser_context_->IsOffTheRecord()));
415
416   ProtocolHandlerScopedVector protocol_interceptors;
417   protocol_interceptors.push_back(
418       ServiceWorkerRequestHandler::CreateInterceptor().release());
419
420   // These calls must happen after StoragePartitionImpl::Create().
421   if (partition_domain.empty()) {
422     partition->SetURLRequestContext(
423         GetContentClient()->browser()->CreateRequestContext(
424             browser_context_,
425             &protocol_handlers,
426             protocol_interceptors.Pass()));
427   } else {
428     partition->SetURLRequestContext(
429         GetContentClient()->browser()->CreateRequestContextForStoragePartition(
430             browser_context_,
431             partition->GetPath(),
432             in_memory,
433             &protocol_handlers,
434             protocol_interceptors.Pass()));
435   }
436   partition->SetMediaURLRequestContext(
437       partition_domain.empty() ?
438       browser_context_->GetMediaRequestContext() :
439       browser_context_->GetMediaRequestContextForStoragePartition(
440           partition->GetPath(), in_memory));
441
442   PostCreateInitialization(partition, in_memory);
443
444   return partition;
445 }
446
447 void StoragePartitionImplMap::AsyncObliterate(
448     const GURL& site,
449     const base::Closure& on_gc_required) {
450   // This method should avoid creating any StoragePartition (which would
451   // create more open file handles) so that it can delete as much of the
452   // data off disk as possible.
453   std::string partition_domain;
454   std::string partition_name;
455   bool in_memory = false;
456   GetContentClient()->browser()->GetStoragePartitionConfigForSite(
457       browser_context_, site, false, &partition_domain,
458       &partition_name, &in_memory);
459
460   // Find the active partitions for the domain. Because these partitions are
461   // active, it is not possible to just delete the directories that contain
462   // the backing data structures without causing the browser to crash. Instead,
463   // of deleteing the directory, we tell each storage context later to
464   // remove any data they have saved. This will leave the directory structure
465   // intact but it will only contain empty databases.
466   std::vector<StoragePartitionImpl*> active_partitions;
467   std::vector<base::FilePath> paths_to_keep;
468   for (PartitionMap::const_iterator it = partitions_.begin();
469        it != partitions_.end();
470        ++it) {
471     const StoragePartitionConfig& config = it->first;
472     if (config.partition_domain == partition_domain) {
473       it->second->ClearData(
474           // All except shader cache.
475           StoragePartition::REMOVE_DATA_MASK_ALL &
476             (~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE),
477           StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL,
478           GURL(),
479           StoragePartition::OriginMatcherFunction(),
480           base::Time(), base::Time::Max(),
481           base::Bind(&base::DoNothing));
482       if (!config.in_memory) {
483         paths_to_keep.push_back(it->second->GetPath());
484       }
485     }
486   }
487
488   // Start a best-effort delete of the on-disk storage excluding paths that are
489   // known to still be in use. This is to delete any previously created
490   // StoragePartition state that just happens to not have been used during this
491   // run of the browser.
492   base::FilePath domain_root = browser_context_->GetPath().Append(
493       GetStoragePartitionDomainPath(partition_domain));
494
495   BrowserThread::PostBlockingPoolTask(
496       FROM_HERE,
497       base::Bind(&BlockingObliteratePath, browser_context_->GetPath(),
498                  domain_root, paths_to_keep,
499                  base::MessageLoopProxy::current(), on_gc_required));
500 }
501
502 void StoragePartitionImplMap::GarbageCollect(
503     scoped_ptr<base::hash_set<base::FilePath> > active_paths,
504     const base::Closure& done) {
505   // Include all paths for current StoragePartitions in the active_paths since
506   // they cannot be deleted safely.
507   for (PartitionMap::const_iterator it = partitions_.begin();
508        it != partitions_.end();
509        ++it) {
510     const StoragePartitionConfig& config = it->first;
511     if (!config.in_memory)
512       active_paths->insert(it->second->GetPath());
513   }
514
515   // Find the directory holding the StoragePartitions and delete everything in
516   // there that isn't considered active.
517   base::FilePath storage_root = browser_context_->GetPath().Append(
518       GetStoragePartitionDomainPath(std::string()));
519   file_access_runner_->PostTaskAndReply(
520       FROM_HERE,
521       base::Bind(&BlockingGarbageCollect, storage_root,
522                  file_access_runner_,
523                  base::Passed(&active_paths)),
524       done);
525 }
526
527 void StoragePartitionImplMap::ForEach(
528     const BrowserContext::StoragePartitionCallback& callback) {
529   for (PartitionMap::const_iterator it = partitions_.begin();
530        it != partitions_.end();
531        ++it) {
532     callback.Run(it->second);
533   }
534 }
535
536 void StoragePartitionImplMap::PostCreateInitialization(
537     StoragePartitionImpl* partition,
538     bool in_memory) {
539   // TODO(ajwong): ResourceContexts no longer have any storage related state.
540   // We should move this into a place where it is called once per
541   // BrowserContext creation rather than piggybacking off the default context
542   // creation.
543   // Note: moving this into Get() before partitions_[] is set causes reentrency.
544   if (!resource_context_initialized_) {
545     resource_context_initialized_ = true;
546     InitializeResourceContext(browser_context_);
547   }
548
549   // Check first to avoid memory leak in unittests.
550   if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) {
551     BrowserThread::PostTask(
552         BrowserThread::IO, FROM_HERE,
553         base::Bind(&ChromeAppCacheService::InitializeOnIOThread,
554                    partition->GetAppCacheService(),
555                    in_memory ? base::FilePath() :
556                        partition->GetPath().Append(kAppCacheDirname),
557                    browser_context_->GetResourceContext(),
558                    make_scoped_refptr(partition->GetURLRequestContext()),
559                    make_scoped_refptr(
560                        browser_context_->GetSpecialStoragePolicy())));
561
562     // We do not call InitializeURLRequestContext() for media contexts because,
563     // other than the HTTP cache, the media contexts share the same backing
564     // objects as their associated "normal" request context.  Thus, the previous
565     // call serves to initialize the media request context for this storage
566     // partition as well.
567   }
568 }
569
570 }  // namespace content