Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / extensions / browser / content_hash_fetcher.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "extensions/browser/content_hash_fetcher.h"
6
7 #include <algorithm>
8
9 #include "base/base64.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/file_util.h"
12 #include "base/json/json_reader.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/metrics/histogram.h"
15 #include "base/synchronization/lock.h"
16 #include "base/task_runner_util.h"
17 #include "base/timer/elapsed_timer.h"
18 #include "base/version.h"
19 #include "content/public/browser/browser_context.h"
20 #include "content/public/browser/browser_thread.h"
21 #include "crypto/sha2.h"
22 #include "extensions/browser/computed_hashes.h"
23 #include "extensions/browser/content_hash_tree.h"
24 #include "extensions/browser/content_verifier_delegate.h"
25 #include "extensions/browser/verified_contents.h"
26 #include "extensions/common/constants.h"
27 #include "extensions/common/extension.h"
28 #include "extensions/common/file_util.h"
29 #include "net/base/load_flags.h"
30 #include "net/url_request/url_fetcher.h"
31 #include "net/url_request/url_fetcher_delegate.h"
32 #include "net/url_request/url_request_status.h"
33
34 namespace {
35
36 typedef std::set<base::FilePath> SortedFilePathSet;
37
38 }  // namespace
39
40 namespace extensions {
41
42 // This class takes care of doing the disk and network I/O work to ensure we
43 // have both verified_contents.json files from the webstore and
44 // computed_hashes.json files computed over the files in an extension's
45 // directory.
46 class ContentHashFetcherJob
47     : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
48       public net::URLFetcherDelegate {
49  public:
50   typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
51   ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
52                         const ContentVerifierKey& key,
53                         const std::string& extension_id,
54                         const base::FilePath& extension_path,
55                         const GURL& fetch_url,
56                         bool force,
57                         const CompletionCallback& callback);
58
59   void Start();
60
61   // Cancels this job, which will attempt to stop I/O operations sooner than
62   // just waiting for the entire job to complete. Safe to call from any thread.
63   void Cancel();
64
65   // Checks whether this job has been cancelled. Safe to call from any thread.
66   bool IsCancelled();
67
68   // Returns whether this job was successful (we have both verified contents
69   // and computed hashes). Even if the job was a success, there might have been
70   // files that were found to have contents not matching expectations; these
71   // are available by calling hash_mismatch_paths().
72   bool success() { return success_; }
73
74   bool force() { return force_; }
75
76   const std::string& extension_id() { return extension_id_; }
77
78   // Returns the set of paths that had a hash mismatch.
79   const std::set<base::FilePath>& hash_mismatch_paths() {
80     return hash_mismatch_paths_;
81   }
82
83  private:
84   friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
85   ~ContentHashFetcherJob() override;
86
87   // Tries to load a verified_contents.json file at |path|. On successfully
88   // reading and validing the file, the verified_contents_ member variable will
89   // be set and this function will return true. If the file does not exist, or
90   // exists but is invalid, it will return false. Also, any invalid
91   // file will be removed from disk and
92   bool LoadVerifiedContents(const base::FilePath& path);
93
94   // Callback for when we're done doing file I/O to see if we already have
95   // a verified contents file. If we don't, this will kick off a network
96   // request to get one.
97   void DoneCheckingForVerifiedContents(bool found);
98
99   // URLFetcherDelegate interface
100   void OnURLFetchComplete(const net::URLFetcher* source) override;
101
102   // Callback for when we're done ensuring we have verified contents, and are
103   // ready to move on to MaybeCreateHashes.
104   void DoneFetchingVerifiedContents(bool success);
105
106   // Callback for the job to write the verified contents to the filesystem.
107   void OnVerifiedContentsWritten(size_t expected_size, int write_result);
108
109   // The verified contents file from the webstore only contains the treehash
110   // root hash, but for performance we want to cache the individual block level
111   // hashes. This function will create that cache with block-level hashes for
112   // each file in the extension if needed (the treehash root hash for each of
113   // these should equal what is in the verified contents file from the
114   // webstore).
115   void MaybeCreateHashes();
116
117   // Computes hashes for all files in |extension_path_|, and uses a
118   // ComputedHashes::Writer to write that information into
119   // |hashes_file|. Returns true on success.
120   bool CreateHashes(const base::FilePath& hashes_file);
121
122   // Will call the callback, if we haven't been cancelled.
123   void DispatchCallback();
124
125   net::URLRequestContextGetter* request_context_;
126   std::string extension_id_;
127   base::FilePath extension_path_;
128
129   // The url we'll need to use to fetch a verified_contents.json file.
130   GURL fetch_url_;
131
132   bool force_;
133
134   CompletionCallback callback_;
135   content::BrowserThread::ID creation_thread_;
136
137   // Used for fetching content signatures.
138   scoped_ptr<net::URLFetcher> url_fetcher_;
139
140   // The key used to validate verified_contents.json.
141   ContentVerifierKey key_;
142
143   // The parsed contents of the verified_contents.json file, either read from
144   // disk or fetched from the network and then written to disk.
145   scoped_ptr<VerifiedContents> verified_contents_;
146
147   // Whether this job succeeded.
148   bool success_;
149
150   // Paths that were found to have a mismatching hash.
151   std::set<base::FilePath> hash_mismatch_paths_;
152
153   // The block size to use for hashing.
154   int block_size_;
155
156   // Note: this may be accessed from multiple threads, so all access should
157   // be protected by |cancelled_lock_|.
158   bool cancelled_;
159
160   // A lock for synchronizing access to |cancelled_|.
161   base::Lock cancelled_lock_;
162
163   DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob);
164 };
165
166 ContentHashFetcherJob::ContentHashFetcherJob(
167     net::URLRequestContextGetter* request_context,
168     const ContentVerifierKey& key,
169     const std::string& extension_id,
170     const base::FilePath& extension_path,
171     const GURL& fetch_url,
172     bool force,
173     const CompletionCallback& callback)
174     : request_context_(request_context),
175       extension_id_(extension_id),
176       extension_path_(extension_path),
177       fetch_url_(fetch_url),
178       force_(force),
179       callback_(callback),
180       key_(key),
181       success_(false),
182       // TODO(asargent) - use the value from verified_contents.json for each
183       // file, instead of using a constant.
184       block_size_(4096),
185       cancelled_(false) {
186   bool got_id =
187       content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
188   DCHECK(got_id);
189 }
190
191 void ContentHashFetcherJob::Start() {
192   base::FilePath verified_contents_path =
193       file_util::GetVerifiedContentsPath(extension_path_);
194   base::PostTaskAndReplyWithResult(
195       content::BrowserThread::GetBlockingPool(),
196       FROM_HERE,
197       base::Bind(&ContentHashFetcherJob::LoadVerifiedContents,
198                  this,
199                  verified_contents_path),
200       base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
201                  this));
202 }
203
204 void ContentHashFetcherJob::Cancel() {
205   base::AutoLock autolock(cancelled_lock_);
206   cancelled_ = true;
207 }
208
209 bool ContentHashFetcherJob::IsCancelled() {
210   base::AutoLock autolock(cancelled_lock_);
211   bool result = cancelled_;
212   return result;
213 }
214
215 ContentHashFetcherJob::~ContentHashFetcherJob() {
216 }
217
218 bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath& path) {
219   if (!base::PathExists(path))
220     return false;
221   verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
222   if (!verified_contents_->InitFrom(path, false)) {
223     verified_contents_.reset();
224     if (!base::DeleteFile(path, false))
225       LOG(WARNING) << "Failed to delete " << path.value();
226     return false;
227   }
228   return true;
229 }
230
231 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
232   if (IsCancelled())
233     return;
234   if (found) {
235     VLOG(1) << "Found verified contents for " << extension_id_;
236     DoneFetchingVerifiedContents(true);
237   } else {
238     VLOG(1) << "Missing verified contents for " << extension_id_
239             << ", fetching...";
240     url_fetcher_.reset(
241         net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
242     url_fetcher_->SetRequestContext(request_context_);
243     url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
244                                net::LOAD_DO_NOT_SAVE_COOKIES |
245                                net::LOAD_DISABLE_CACHE);
246     url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
247     url_fetcher_->Start();
248   }
249 }
250
251 // Helper function to let us pass ownership of a string via base::Bind with the
252 // contents to be written into a file. Also ensures that the directory for
253 // |path| exists, creating it if needed.
254 static int WriteFileHelper(const base::FilePath& path,
255                            scoped_ptr<std::string> content) {
256   base::FilePath dir = path.DirName();
257   return (base::CreateDirectoryAndGetError(dir, NULL) &&
258           base::WriteFile(path, content->data(), content->size()));
259 }
260
261 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
262   VLOG(1) << "URLFetchComplete for " << extension_id_
263           << " is_success:" << url_fetcher_->GetStatus().is_success() << " "
264           << fetch_url_.possibly_invalid_spec();
265   if (IsCancelled())
266     return;
267   scoped_ptr<std::string> response(new std::string);
268   if (!url_fetcher_->GetStatus().is_success() ||
269       !url_fetcher_->GetResponseAsString(response.get())) {
270     DoneFetchingVerifiedContents(false);
271     return;
272   }
273
274   // Parse the response to make sure it is valid json (on staging sometimes it
275   // can be a login redirect html, xml file, etc. if you aren't logged in with
276   // the right cookies).  TODO(asargent) - It would be a nice enhancement to
277   // move to parsing this in a sandboxed helper (crbug.com/372878).
278   scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
279   if (parsed) {
280     VLOG(1) << "JSON parsed ok for " << extension_id_;
281
282     parsed.reset();  // no longer needed
283     base::FilePath destination =
284         file_util::GetVerifiedContentsPath(extension_path_);
285     size_t size = response->size();
286     base::PostTaskAndReplyWithResult(
287         content::BrowserThread::GetBlockingPool(),
288         FROM_HERE,
289         base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
290         base::Bind(
291             &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
292   } else {
293     DoneFetchingVerifiedContents(false);
294   }
295 }
296
297 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
298                                                       int write_result) {
299   bool success =
300       (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
301   DoneFetchingVerifiedContents(success);
302 }
303
304 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
305   if (IsCancelled())
306     return;
307
308   if (!success) {
309     DispatchCallback();
310     return;
311   }
312
313   content::BrowserThread::PostBlockingPoolSequencedTask(
314       "ContentHashFetcher",
315       FROM_HERE,
316       base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
317 }
318
319 void ContentHashFetcherJob::MaybeCreateHashes() {
320   if (IsCancelled())
321     return;
322   base::FilePath hashes_file =
323       file_util::GetComputedHashesPath(extension_path_);
324
325   if (!force_ && base::PathExists(hashes_file)) {
326     success_ = true;
327   } else {
328     if (force_)
329       base::DeleteFile(hashes_file, false /* recursive */);
330     success_ = CreateHashes(hashes_file);
331   }
332
333   content::BrowserThread::PostTask(
334       creation_thread_,
335       FROM_HERE,
336       base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
337 }
338
339 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
340   base::ElapsedTimer timer;
341   if (IsCancelled())
342     return false;
343   // Make sure the directory exists.
344   if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
345     return false;
346
347   if (!verified_contents_.get()) {
348     base::FilePath verified_contents_path =
349         file_util::GetVerifiedContentsPath(extension_path_);
350     verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
351     if (!verified_contents_->InitFrom(verified_contents_path, false))
352       return false;
353     verified_contents_.reset();
354   }
355
356   base::FileEnumerator enumerator(extension_path_,
357                                   true, /* recursive */
358                                   base::FileEnumerator::FILES);
359   // First discover all the file paths and put them in a sorted set.
360   SortedFilePathSet paths;
361   for (;;) {
362     if (IsCancelled())
363       return false;
364
365     base::FilePath full_path = enumerator.Next();
366     if (full_path.empty())
367       break;
368     paths.insert(full_path);
369   }
370
371   // Now iterate over all the paths in sorted order and compute the block hashes
372   // for each one.
373   ComputedHashes::Writer writer;
374   for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
375     if (IsCancelled())
376       return false;
377     const base::FilePath& full_path = *i;
378     base::FilePath relative_path;
379     extension_path_.AppendRelativePath(full_path, &relative_path);
380     relative_path = relative_path.NormalizePathSeparatorsTo('/');
381
382     if (!verified_contents_->HasTreeHashRoot(relative_path))
383       continue;
384
385     std::string contents;
386     if (!base::ReadFileToString(full_path, &contents)) {
387       LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
388       continue;
389     }
390
391     // Iterate through taking the hash of each block of size (block_size_) of
392     // the file.
393     std::vector<std::string> hashes;
394     ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes);
395     std::string root =
396         ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
397     if (!verified_contents_->TreeHashRootEquals(relative_path, root)) {
398       VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe();
399       hash_mismatch_paths_.insert(relative_path);
400       continue;
401     }
402
403     writer.AddHashes(relative_path, block_size_, hashes);
404   }
405   bool result = writer.WriteToFile(hashes_file);
406   UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
407                       timer.Elapsed());
408   return result;
409 }
410
411 void ContentHashFetcherJob::DispatchCallback() {
412   {
413     base::AutoLock autolock(cancelled_lock_);
414     if (cancelled_)
415       return;
416   }
417   callback_.Run(this);
418 }
419
420 // ----
421
422 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
423                                        ContentVerifierDelegate* delegate,
424                                        const FetchCallback& callback)
425     : context_(context),
426       delegate_(delegate),
427       fetch_callback_(callback),
428       weak_ptr_factory_(this) {
429 }
430
431 ContentHashFetcher::~ContentHashFetcher() {
432   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
433     i->second->Cancel();
434   }
435 }
436
437 void ContentHashFetcher::DoFetch(const Extension* extension, bool force) {
438   DCHECK(extension);
439
440   IdAndVersion key(extension->id(), extension->version()->GetString());
441   JobMap::iterator found = jobs_.find(key);
442   if (found != jobs_.end()) {
443     if (!force || found->second->force()) {
444       // Just let the existing job keep running.
445       return;
446     } else {
447       // Kill the existing non-force job, so we can start a new one below.
448       found->second->Cancel();
449       jobs_.erase(found);
450     }
451   }
452
453   // TODO(asargent) - we should do something here to remember recent attempts
454   // to fetch signatures by extension id, and use exponential backoff to avoid
455   // hammering the server when we aren't successful in getting them.
456   // crbug.com/373397
457
458   DCHECK(extension->version());
459   GURL url =
460       delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
461   ContentHashFetcherJob* job =
462       new ContentHashFetcherJob(context_->GetRequestContext(),
463                                 delegate_->PublicKey(),
464                                 extension->id(),
465                                 extension->path(),
466                                 url,
467                                 force,
468                                 base::Bind(&ContentHashFetcher::JobFinished,
469                                            weak_ptr_factory_.GetWeakPtr()));
470   jobs_.insert(std::make_pair(key, job));
471   job->Start();
472 }
473
474 void ContentHashFetcher::ExtensionLoaded(const Extension* extension) {
475   CHECK(extension);
476   DoFetch(extension, false);
477 }
478
479 void ContentHashFetcher::ExtensionUnloaded(const Extension* extension) {
480   CHECK(extension);
481   IdAndVersion key(extension->id(), extension->version()->GetString());
482   JobMap::iterator found = jobs_.find(key);
483   if (found != jobs_.end()) {
484     found->second->Cancel();
485     jobs_.erase(found);
486   }
487 }
488
489 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
490   if (!job->IsCancelled()) {
491     fetch_callback_.Run(job->extension_id(),
492                         job->success(),
493                         job->force(),
494                         job->hash_mismatch_paths());
495   }
496
497   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
498     if (i->second.get() == job) {
499       jobs_.erase(i);
500       break;
501     }
502   }
503 }
504
505 }  // namespace extensions