Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / extensions / browser / content_hash_fetcher.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "extensions/browser/content_hash_fetcher.h"
6
7 #include <algorithm>
8
9 #include "base/base64.h"
10 #include "base/file_util.h"
11 #include "base/files/file_enumerator.h"
12 #include "base/json/json_reader.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/stl_util.h"
15 #include "base/synchronization/lock.h"
16 #include "base/task_runner_util.h"
17 #include "base/version.h"
18 #include "content/public/browser/browser_context.h"
19 #include "content/public/browser/browser_thread.h"
20 #include "crypto/secure_hash.h"
21 #include "crypto/sha2.h"
22 #include "extensions/browser/computed_hashes.h"
23 #include "extensions/browser/extension_registry.h"
24 #include "extensions/common/constants.h"
25 #include "extensions/common/extension.h"
26 #include "extensions/common/file_util.h"
27 #include "net/base/load_flags.h"
28 #include "net/url_request/url_fetcher.h"
29 #include "net/url_request/url_fetcher_delegate.h"
30 #include "net/url_request/url_request_status.h"
31
32 namespace {
33
34 typedef std::set<base::FilePath> SortedFilePathSet;
35
36 }  // namespace
37
38 namespace extensions {
39
40 // This class takes care of doing the disk and network I/O work to ensure we
41 // have both verified_contents.json files from the webstore and
42 // computed_hashes.json files computed over the files in an extension's
43 // directory.
44 class ContentHashFetcherJob
45     : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
46       public net::URLFetcherDelegate {
47  public:
48   typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
49   ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
50                         const std::string& extension_id,
51                         const base::FilePath& extension_path,
52                         const GURL& fetch_url,
53                         const CompletionCallback& callback);
54
55   void Start();
56
57   // Cancels this job, which will attempt to stop I/O operations sooner than
58   // just waiting for the entire job to complete. Safe to call from any thread.
59   void Cancel();
60
61   // Returns whether this job was completely successful (we have both verified
62   // contents and computed hashes).
63   bool success() { return success_; }
64
65   // Do we have a verified_contents.json file?
66   bool have_verified_contents() { return have_verified_contents_; }
67
68  private:
69   friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
70   virtual ~ContentHashFetcherJob();
71
72   // Checks whether this job has been cancelled. Safe to call from any thread.
73   bool IsCancelled();
74
75   // Callback for when we're done doing file I/O to see if we already have
76   // a verified contents file. If we don't, this will kick off a network
77   // request to get one.
78   void DoneCheckingForVerifiedContents(bool found);
79
80   // URLFetcherDelegate interface
81   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
82
83   // Callback for when we're done ensuring we have verified contents, and are
84   // ready to move on to MaybeCreateHashes.
85   void DoneFetchingVerifiedContents(bool success);
86
87   // Callback for the job to write the verified contents to the filesystem.
88   void OnVerifiedContentsWritten(size_t expected_size, int write_result);
89
90   // The verified contents file from the webstore only contains the treehash
91   // root hash, but for performance we want to cache the individual block level
92   // hashes. This function will create that cache with block-level hashes for
93   // each file in the extension if needed (the treehash root hash for each of
94   // these should equal what is in the verified contents file from the
95   // webstore).
96   void MaybeCreateHashes();
97
98   // Computes hashes for all files in |extension_path_|, and uses a
99   // ComputedHashes::Writer to write that information into
100   // |hashes_file|. Returns true on success.
101   bool CreateHashes(const base::FilePath& hashes_file);
102
103   // Will call the callback, if we haven't been cancelled.
104   void DispatchCallback();
105
106   net::URLRequestContextGetter* request_context_;
107   std::string extension_id_;
108   base::FilePath extension_path_;
109
110   // The url we'll need to use to fetch a verified_contents.json file.
111   GURL fetch_url_;
112
113   CompletionCallback callback_;
114   content::BrowserThread::ID creation_thread_;
115
116   // Used for fetching content signatures.
117   scoped_ptr<net::URLFetcher> url_fetcher_;
118
119   // Whether this job succeeded.
120   bool success_;
121
122   // Whether we either found a verified contents file, or were successful in
123   // fetching one and saving it to disk.
124   bool have_verified_contents_;
125
126   // The block size to use for hashing.
127   int block_size_;
128
129   // Note: this may be accessed from multiple threads, so all access should
130   // be protected by |cancelled_lock_|.
131   bool cancelled_;
132
133   // A lock for synchronizing access to |cancelled_|.
134   base::Lock cancelled_lock_;
135 };
136
137 ContentHashFetcherJob::ContentHashFetcherJob(
138     net::URLRequestContextGetter* request_context,
139     const std::string& extension_id,
140     const base::FilePath& extension_path,
141     const GURL& fetch_url,
142     const CompletionCallback& callback)
143     : request_context_(request_context),
144       extension_id_(extension_id),
145       extension_path_(extension_path),
146       fetch_url_(fetch_url),
147       callback_(callback),
148       success_(false),
149       have_verified_contents_(false),
150       // TODO(asargent) - use the value from verified_contents.json for each
151       // file, instead of using a constant.
152       block_size_(4096),
153       cancelled_(false) {
154   bool got_id =
155       content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
156   DCHECK(got_id);
157 }
158
159 void ContentHashFetcherJob::Start() {
160   base::FilePath verified_contents_path =
161       file_util::GetVerifiedContentsPath(extension_path_);
162   base::PostTaskAndReplyWithResult(
163       content::BrowserThread::GetBlockingPool(),
164       FROM_HERE,
165       base::Bind(&base::PathExists, verified_contents_path),
166       base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
167                  this));
168 }
169
170 void ContentHashFetcherJob::Cancel() {
171   base::AutoLock autolock(cancelled_lock_);
172   cancelled_ = true;
173 }
174
175 ContentHashFetcherJob::~ContentHashFetcherJob() {
176 }
177
178 bool ContentHashFetcherJob::IsCancelled() {
179   base::AutoLock autolock(cancelled_lock_);
180   bool result = cancelled_;
181   return result;
182 }
183
184 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
185   if (IsCancelled())
186     return;
187   if (found) {
188     DoneFetchingVerifiedContents(true);
189   } else {
190     url_fetcher_.reset(
191         net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
192     url_fetcher_->SetRequestContext(request_context_);
193     url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
194                                net::LOAD_DO_NOT_SAVE_COOKIES |
195                                net::LOAD_DISABLE_CACHE);
196     url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
197     url_fetcher_->Start();
198   }
199 }
200
201 // Helper function to let us pass ownership of a string via base::Bind with the
202 // contents to be written into a file. Also ensures that the directory for
203 // |path| exists, creating it if needed.
204 static int WriteFileHelper(const base::FilePath& path,
205                            scoped_ptr<std::string> content) {
206   base::FilePath dir = path.DirName();
207   return (base::CreateDirectoryAndGetError(dir, NULL) &&
208           base::WriteFile(path, content->data(), content->size()));
209 }
210
211 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
212   if (IsCancelled())
213     return;
214   scoped_ptr<std::string> response(new std::string);
215   if (!url_fetcher_->GetStatus().is_success() ||
216       !url_fetcher_->GetResponseAsString(response.get())) {
217     DoneFetchingVerifiedContents(false);
218     return;
219   }
220
221   // Parse the response to make sure it is valid json (on staging sometimes it
222   // can be a login redirect html, xml file, etc. if you aren't logged in with
223   // the right cookies).  TODO(asargent) - It would be a nice enhancement to
224   // move to parsing this in a sandboxed helper (crbug.com/372878).
225   scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
226   if (parsed) {
227     parsed.reset();  // no longer needed
228     base::FilePath destination =
229         file_util::GetVerifiedContentsPath(extension_path_);
230     size_t size = response->size();
231     base::PostTaskAndReplyWithResult(
232         content::BrowserThread::GetBlockingPool(),
233         FROM_HERE,
234         base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
235         base::Bind(
236             &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
237   } else {
238     DoneFetchingVerifiedContents(false);
239   }
240 }
241
242 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
243                                                       int write_result) {
244   bool success =
245       (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
246   DoneFetchingVerifiedContents(success);
247 }
248
249 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
250   have_verified_contents_ = success;
251
252   if (IsCancelled())
253     return;
254
255   // TODO(asargent) - eventually we should abort here on !success, but for
256   // testing purposes it's actually still helpful to continue on to create the
257   // computed hashes.
258
259   content::BrowserThread::PostBlockingPoolSequencedTask(
260       "ContentHashFetcher",
261       FROM_HERE,
262       base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
263 }
264
265 void ContentHashFetcherJob::MaybeCreateHashes() {
266   if (IsCancelled())
267     return;
268   base::FilePath hashes_file =
269       file_util::GetComputedHashesPath(extension_path_);
270
271   if (base::PathExists(hashes_file))
272     success_ = true;
273   else
274     success_ = CreateHashes(hashes_file);
275
276   content::BrowserThread::PostTask(
277       creation_thread_,
278       FROM_HERE,
279       base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
280 }
281
282 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
283   if (IsCancelled())
284     return false;
285   // Make sure the directory exists.
286   if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
287     return false;
288
289   base::FileEnumerator enumerator(extension_path_,
290                                   true, /* recursive */
291                                   base::FileEnumerator::FILES);
292   // First discover all the file paths and put them in a sorted set.
293   SortedFilePathSet paths;
294   for (;;) {
295     if (IsCancelled())
296       return false;
297
298     base::FilePath full_path = enumerator.Next();
299     if (full_path.empty())
300       break;
301     paths.insert(full_path);
302   }
303
304   // Now iterate over all the paths in sorted order and compute the block hashes
305   // for each one.
306   ComputedHashes::Writer writer;
307   for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
308     if (IsCancelled())
309       return false;
310     const base::FilePath& full_path = *i;
311     base::FilePath relative_path;
312     extension_path_.AppendRelativePath(full_path, &relative_path);
313     std::string contents;
314     if (!base::ReadFileToString(full_path, &contents)) {
315       LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
316       continue;
317     }
318
319     // Iterate through taking the hash of each block of size (block_size_) of
320     // the file.
321     std::vector<std::string> hashes;
322     size_t offset = 0;
323     while (offset < contents.size()) {
324       if (IsCancelled())
325         return false;
326       const char* block_start = contents.data() + offset;
327       size_t bytes_to_read =
328           std::min(contents.size() - offset, static_cast<size_t>(block_size_));
329       DCHECK(bytes_to_read > 0);
330       scoped_ptr<crypto::SecureHash> hash(
331           crypto::SecureHash::Create(crypto::SecureHash::SHA256));
332       hash->Update(block_start, bytes_to_read);
333
334       hashes.push_back(std::string());
335       std::string* buffer = &hashes.back();
336       buffer->resize(crypto::kSHA256Length);
337       hash->Finish(string_as_array(buffer), buffer->size());
338
339       // Get ready for next iteration.
340       offset += bytes_to_read;
341     }
342     writer.AddHashes(relative_path, block_size_, hashes);
343   }
344   return writer.WriteToFile(hashes_file);
345 }
346
347 void ContentHashFetcherJob::DispatchCallback() {
348   {
349     base::AutoLock autolock(cancelled_lock_);
350     if (cancelled_)
351       return;
352   }
353   callback_.Run(this);
354 }
355
356 // ----
357
358 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
359                                        ContentVerifierDelegate* delegate)
360     : context_(context),
361       delegate_(delegate),
362       observer_(this),
363       weak_ptr_factory_(this) {
364 }
365
366 ContentHashFetcher::~ContentHashFetcher() {
367   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
368     i->second->Cancel();
369   }
370 }
371
372 void ContentHashFetcher::Start() {
373   ExtensionRegistry* registry = ExtensionRegistry::Get(context_);
374   observer_.Add(registry);
375 }
376
377 void ContentHashFetcher::DoFetch(const Extension* extension) {
378   if (!extension || !delegate_->ShouldBeVerified(*extension))
379     return;
380
381   IdAndVersion key(extension->id(), extension->version()->GetString());
382   if (ContainsKey(jobs_, key))
383     return;
384
385   // TODO(asargent) - we should do something here to remember recent attempts
386   // to fetch signatures by extension id, and use exponential backoff to avoid
387   // hammering the server when we aren't successful in getting them.
388   // crbug.com/373397
389
390   DCHECK(extension->version());
391   GURL url =
392       delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
393   ContentHashFetcherJob* job =
394       new ContentHashFetcherJob(context_->GetRequestContext(),
395                                 extension->id(),
396                                 extension->path(),
397                                 url,
398                                 base::Bind(&ContentHashFetcher::JobFinished,
399                                            weak_ptr_factory_.GetWeakPtr()));
400   jobs_.insert(std::make_pair(key, job));
401   job->Start();
402 }
403
404 void ContentHashFetcher::OnExtensionLoaded(
405     content::BrowserContext* browser_context,
406     const Extension* extension) {
407   CHECK(extension);
408   DoFetch(extension);
409 }
410
411 void ContentHashFetcher::OnExtensionUnloaded(
412     content::BrowserContext* browser_context,
413     const Extension* extension,
414     UnloadedExtensionInfo::Reason reason) {
415   CHECK(extension);
416   IdAndVersion key(extension->id(), extension->version()->GetString());
417   JobMap::iterator found = jobs_.find(key);
418   if (found != jobs_.end())
419     jobs_.erase(found);
420 }
421
422 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
423   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
424     if (i->second.get() == job) {
425       jobs_.erase(i);
426       break;
427     }
428   }
429 }
430
431 }  // namespace extensions