Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / content / browser / download / save_package.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/download/save_package.h"
6
7 #include <algorithm>
8
9 #include "base/bind.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/i18n/file_util_icu.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/sys_string_conversions.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/threading/thread.h"
21 #include "content/browser/download/download_item_impl.h"
22 #include "content/browser/download/download_manager_impl.h"
23 #include "content/browser/download/download_stats.h"
24 #include "content/browser/download/save_file.h"
25 #include "content/browser/download/save_file_manager.h"
26 #include "content/browser/download/save_item.h"
27 #include "content/browser/loader/resource_dispatcher_host_impl.h"
28 #include "content/browser/renderer_host/render_process_host_impl.h"
29 #include "content/browser/renderer_host/render_view_host_delegate.h"
30 #include "content/browser/renderer_host/render_view_host_impl.h"
31 #include "content/common/view_messages.h"
32 #include "content/public/browser/browser_context.h"
33 #include "content/public/browser/browser_thread.h"
34 #include "content/public/browser/content_browser_client.h"
35 #include "content/public/browser/download_manager_delegate.h"
36 #include "content/public/browser/navigation_entry.h"
37 #include "content/public/browser/notification_service.h"
38 #include "content/public/browser/notification_types.h"
39 #include "content/public/browser/resource_context.h"
40 #include "content/public/browser/web_contents.h"
41 #include "net/base/filename_util.h"
42 #include "net/base/io_buffer.h"
43 #include "net/base/mime_util.h"
44 #include "net/url_request/url_request_context.h"
45 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
46 #include "url/url_constants.h"
47
48 using base::Time;
49 using blink::WebPageSerializerClient;
50
51 namespace content {
52 namespace {
53
54 // A counter for uniquely identifying each save package.
55 int g_save_package_id = 0;
56
57 // Default name which will be used when we can not get proper name from
58 // resource URL.
59 const char kDefaultSaveName[] = "saved_resource";
60
61 // Maximum number of file ordinal number. I think it's big enough for resolving
62 // name-conflict files which has same base file name.
63 const int32 kMaxFileOrdinalNumber = 9999;
64
65 // Maximum length for file path. Since Windows have MAX_PATH limitation for
66 // file path, we need to make sure length of file path of every saved file
67 // is less than MAX_PATH
68 #if defined(OS_WIN)
69 const uint32 kMaxFilePathLength = MAX_PATH - 1;
70 #elif defined(OS_POSIX)
71 const uint32 kMaxFilePathLength = PATH_MAX - 1;
72 #endif
73
74 // Maximum length for file ordinal number part. Since we only support the
75 // maximum 9999 for ordinal number, which means maximum file ordinal number part
76 // should be "(9998)", so the value is 6.
77 const uint32 kMaxFileOrdinalNumberPartLength = 6;
78
79 // Strip current ordinal number, if any. Should only be used on pure
80 // file names, i.e. those stripped of their extensions.
81 // TODO(estade): improve this to not choke on alternate encodings.
82 base::FilePath::StringType StripOrdinalNumber(
83     const base::FilePath::StringType& pure_file_name) {
84   base::FilePath::StringType::size_type r_paren_index =
85       pure_file_name.rfind(FILE_PATH_LITERAL(')'));
86   base::FilePath::StringType::size_type l_paren_index =
87       pure_file_name.rfind(FILE_PATH_LITERAL('('));
88   if (l_paren_index >= r_paren_index)
89     return pure_file_name;
90
91   for (base::FilePath::StringType::size_type i = l_paren_index + 1;
92        i != r_paren_index; ++i) {
93     if (!IsAsciiDigit(pure_file_name[i]))
94       return pure_file_name;
95   }
96
97   return pure_file_name.substr(0, l_paren_index);
98 }
99
100 // Check whether we can save page as complete-HTML for the contents which
101 // have specified a MIME type. Now only contents which have the MIME type
102 // "text/html" can be saved as complete-HTML.
103 bool CanSaveAsComplete(const std::string& contents_mime_type) {
104   return contents_mime_type == "text/html" ||
105          contents_mime_type == "application/xhtml+xml";
106 }
107
108 // Request handle for SavePackage downloads. Currently doesn't support
109 // pause/resume/cancel, but returns a WebContents.
110 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
111  public:
112   SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
113       : save_package_(save_package) {}
114
115   // DownloadRequestHandleInterface
116   WebContents* GetWebContents() const override {
117     return save_package_.get() ? save_package_->web_contents() : NULL;
118   }
119   DownloadManager* GetDownloadManager() const override { return NULL; }
120   void PauseRequest() const override {}
121   void ResumeRequest() const override {}
122   void CancelRequest() const override {}
123   std::string DebugString() const override {
124     return "SavePackage DownloadRequestHandle";
125   }
126
127  private:
128   base::WeakPtr<SavePackage> save_package_;
129 };
130
131 }  // namespace
132
133 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
134     FILE_PATH_LITERAL("html");
135
136 SavePackage::SavePackage(WebContents* web_contents,
137                          SavePageType save_type,
138                          const base::FilePath& file_full_path,
139                          const base::FilePath& directory_full_path)
140     : WebContentsObserver(web_contents),
141       file_manager_(NULL),
142       download_manager_(NULL),
143       download_(NULL),
144       page_url_(GetUrlToBeSaved()),
145       saved_main_file_path_(file_full_path),
146       saved_main_directory_path_(directory_full_path),
147       title_(web_contents->GetTitle()),
148       start_tick_(base::TimeTicks::Now()),
149       finished_(false),
150       mhtml_finishing_(false),
151       user_canceled_(false),
152       disk_error_occurred_(false),
153       save_type_(save_type),
154       all_save_items_count_(0),
155       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
156       wait_state_(INITIALIZE),
157       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
158       unique_id_(g_save_package_id++),
159       wrote_to_completed_file_(false),
160       wrote_to_failed_file_(false) {
161   DCHECK(page_url_.is_valid());
162   DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
163          (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
164          (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
165   DCHECK(!saved_main_file_path_.empty() &&
166          saved_main_file_path_.value().length() <= kMaxFilePathLength);
167   DCHECK(!saved_main_directory_path_.empty() &&
168          saved_main_directory_path_.value().length() < kMaxFilePathLength);
169   InternalInit();
170 }
171
172 SavePackage::SavePackage(WebContents* web_contents)
173     : WebContentsObserver(web_contents),
174       file_manager_(NULL),
175       download_manager_(NULL),
176       download_(NULL),
177       page_url_(GetUrlToBeSaved()),
178       title_(web_contents->GetTitle()),
179       start_tick_(base::TimeTicks::Now()),
180       finished_(false),
181       mhtml_finishing_(false),
182       user_canceled_(false),
183       disk_error_occurred_(false),
184       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
185       all_save_items_count_(0),
186       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
187       wait_state_(INITIALIZE),
188       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
189       unique_id_(g_save_package_id++),
190       wrote_to_completed_file_(false),
191       wrote_to_failed_file_(false) {
192   DCHECK(page_url_.is_valid());
193   InternalInit();
194 }
195
196 // This is for testing use. Set |finished_| as true because we don't want
197 // method Cancel to be be called in destructor in test mode.
198 // We also don't call InternalInit().
199 SavePackage::SavePackage(WebContents* web_contents,
200                          const base::FilePath& file_full_path,
201                          const base::FilePath& directory_full_path)
202     : WebContentsObserver(web_contents),
203       file_manager_(NULL),
204       download_manager_(NULL),
205       download_(NULL),
206       saved_main_file_path_(file_full_path),
207       saved_main_directory_path_(directory_full_path),
208       start_tick_(base::TimeTicks::Now()),
209       finished_(true),
210       mhtml_finishing_(false),
211       user_canceled_(false),
212       disk_error_occurred_(false),
213       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
214       all_save_items_count_(0),
215       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
216       wait_state_(INITIALIZE),
217       contents_id_(0),
218       unique_id_(g_save_package_id++),
219       wrote_to_completed_file_(false),
220       wrote_to_failed_file_(false) {
221 }
222
223 SavePackage::~SavePackage() {
224   // Stop receiving saving job's updates
225   if (!finished_ && !canceled()) {
226     // Unexpected quit.
227     Cancel(true);
228   }
229
230   // We should no longer be observing the DownloadItem at this point.
231   CHECK(!download_);
232
233   DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
234                                    completed_count() +
235                                    in_process_count()));
236   // Free all SaveItems.
237   while (!waiting_item_queue_.empty()) {
238     // We still have some items which are waiting for start to save.
239     SaveItem* save_item = waiting_item_queue_.front();
240     waiting_item_queue_.pop();
241     delete save_item;
242   }
243
244   STLDeleteValues(&saved_success_items_);
245   STLDeleteValues(&in_progress_items_);
246   STLDeleteValues(&saved_failed_items_);
247
248   file_manager_ = NULL;
249 }
250
251 GURL SavePackage::GetUrlToBeSaved() {
252   // Instead of using web_contents_.GetURL here, we use url() (which is the
253   // "real" url of the page) from the NavigationEntry because it reflects its
254   // origin rather than the displayed one (returned by GetURL) which may be
255   // different (like having "view-source:" on the front).
256   NavigationEntry* visible_entry =
257       web_contents()->GetController().GetVisibleEntry();
258   return visible_entry->GetURL();
259 }
260
261 void SavePackage::Cancel(bool user_action) {
262   if (!canceled()) {
263     if (user_action)
264       user_canceled_ = true;
265     else
266       disk_error_occurred_ = true;
267     Stop();
268   }
269   RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
270 }
271
272 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
273 // cases, we need file_manager_ to be initialized, so we do this first.
274 void SavePackage::InternalInit() {
275   ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
276   if (!rdh) {
277     NOTREACHED();
278     return;
279   }
280
281   file_manager_ = rdh->save_file_manager();
282   DCHECK(file_manager_);
283
284   download_manager_ = static_cast<DownloadManagerImpl*>(
285       BrowserContext::GetDownloadManager(
286           web_contents()->GetBrowserContext()));
287   DCHECK(download_manager_);
288
289   RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
290 }
291
292 bool SavePackage::Init(
293     const SavePackageDownloadCreatedCallback& download_created_callback) {
294   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
295   // Set proper running state.
296   if (wait_state_ != INITIALIZE)
297     return false;
298
299   wait_state_ = START_PROCESS;
300
301   // Initialize the request context and resource dispatcher.
302   BrowserContext* browser_context = web_contents()->GetBrowserContext();
303   if (!browser_context) {
304     NOTREACHED();
305     return false;
306   }
307
308   scoped_ptr<DownloadRequestHandleInterface> request_handle(
309       new SavePackageRequestHandle(AsWeakPtr()));
310   // The download manager keeps ownership but adds us as an observer.
311   download_manager_->CreateSavePackageDownloadItem(
312       saved_main_file_path_,
313       page_url_,
314       ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
315        "multipart/related" : "text/html"),
316       request_handle.Pass(),
317       base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
318                  download_created_callback));
319   return true;
320 }
321
322 void SavePackage::InitWithDownloadItem(
323     const SavePackageDownloadCreatedCallback& download_created_callback,
324     DownloadItemImpl* item) {
325   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
326   DCHECK(item);
327   download_ = item;
328   download_->AddObserver(this);
329   // Confirm above didn't delete the tab out from under us.
330   if (!download_created_callback.is_null())
331     download_created_callback.Run(download_);
332
333   // Check save type and process the save page job.
334   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
335     // Get directory
336     DCHECK(!saved_main_directory_path_.empty());
337     GetAllSavableResourceLinksForCurrentPage();
338   } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
339     web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
340         &SavePackage::OnMHTMLGenerated, this));
341   } else {
342     DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
343     wait_state_ = NET_FILES;
344     SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
345         SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
346         SaveFileCreateInfo::SAVE_FILE_FROM_NET;
347     SaveItem* save_item = new SaveItem(page_url_,
348                                        Referrer(),
349                                        this,
350                                        save_source);
351     // Add this item to waiting list.
352     waiting_item_queue_.push(save_item);
353     all_save_items_count_ = 1;
354     download_->SetTotalBytes(1);
355
356     DoSavingProcess();
357   }
358 }
359
360 void SavePackage::OnMHTMLGenerated(int64 size) {
361   if (size <= 0) {
362     Cancel(false);
363     return;
364   }
365   wrote_to_completed_file_ = true;
366
367   // Hack to avoid touching download_ after user cancel.
368   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
369   // with SavePackage flow.
370   if (download_->GetState() == DownloadItem::IN_PROGRESS) {
371     download_->SetTotalBytes(size);
372     download_->DestinationUpdate(size, 0, std::string());
373     // Must call OnAllDataSaved here in order for
374     // GDataDownloadObserver::ShouldUpload() to return true.
375     // ShouldCompleteDownload() may depend on the gdata uploader to finish.
376     download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
377   }
378
379   if (!download_manager_->GetDelegate()) {
380     Finish();
381     return;
382   }
383
384   if (download_manager_->GetDelegate()->ShouldCompleteDownload(
385           download_, base::Bind(&SavePackage::Finish, this))) {
386     Finish();
387   }
388 }
389
390 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
391 // restricted by NAME_MAX. The maximum allowed path looks like:
392 // '/path/to/save_dir' + '/' + NAME_MAX.
393 uint32 SavePackage::GetMaxPathLengthForDirectory(
394     const base::FilePath& base_dir) {
395 #if defined(OS_POSIX)
396   return std::min(kMaxFilePathLength,
397                   static_cast<uint32>(base_dir.value().length()) +
398                   NAME_MAX + 1);
399 #else
400   return kMaxFilePathLength;
401 #endif
402 }
403
404 // File name is considered being consist of pure file name, dot and file
405 // extension name. File name might has no dot and file extension, or has
406 // multiple dot inside file name. The dot, which separates the pure file
407 // name and file extension name, is last dot in the whole file name.
408 // This function is for making sure the length of specified file path is not
409 // great than the specified maximum length of file path and getting safe pure
410 // file name part if the input pure file name is too long.
411 // The parameter |dir_path| specifies directory part of the specified
412 // file path. The parameter |file_name_ext| specifies file extension
413 // name part of the specified file path (including start dot). The parameter
414 // |max_file_path_len| specifies maximum length of the specified file path.
415 // The parameter |pure_file_name| input pure file name part of the specified
416 // file path. If the length of specified file path is great than
417 // |max_file_path_len|, the |pure_file_name| will output new pure file name
418 // part for making sure the length of specified file path is less than
419 // specified maximum length of file path. Return false if the function can
420 // not get a safe pure file name, otherwise it returns true.
421 bool SavePackage::GetSafePureFileName(
422     const base::FilePath& dir_path,
423     const base::FilePath::StringType& file_name_ext,
424     uint32 max_file_path_len,
425     base::FilePath::StringType* pure_file_name) {
426   DCHECK(!pure_file_name->empty());
427   int available_length = static_cast<int>(max_file_path_len -
428                                           dir_path.value().length() -
429                                           file_name_ext.length());
430   // Need an extra space for the separator.
431   if (!dir_path.EndsWithSeparator())
432     --available_length;
433
434   // Plenty of room.
435   if (static_cast<int>(pure_file_name->length()) <= available_length)
436     return true;
437
438   // Limited room. Truncate |pure_file_name| to fit.
439   if (available_length > 0) {
440     *pure_file_name = pure_file_name->substr(0, available_length);
441     return true;
442   }
443
444   // Not enough room to even use a shortened |pure_file_name|.
445   pure_file_name->clear();
446   return false;
447 }
448
449 // Generate name for saving resource.
450 bool SavePackage::GenerateFileName(const std::string& disposition,
451                                    const GURL& url,
452                                    bool need_html_ext,
453                                    base::FilePath::StringType* generated_name) {
454   // TODO(jungshik): Figure out the referrer charset when having one
455   // makes sense and pass it to GenerateFileName.
456   base::FilePath file_path = net::GenerateFileName(url,
457                                                    disposition,
458                                                    std::string(),
459                                                    std::string(),
460                                                    std::string(),
461                                                    kDefaultSaveName);
462
463   DCHECK(!file_path.empty());
464   base::FilePath::StringType pure_file_name =
465       file_path.RemoveExtension().BaseName().value();
466   base::FilePath::StringType file_name_ext = file_path.Extension();
467
468   // If it is HTML resource, use ".html" as its extension.
469   if (need_html_ext) {
470     file_name_ext = FILE_PATH_LITERAL(".");
471     file_name_ext.append(kDefaultHtmlExtension);
472   }
473
474   // Need to make sure the suggested file name is not too long.
475   uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
476
477   // Get safe pure file name.
478   if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
479                            max_path, &pure_file_name))
480     return false;
481
482   base::FilePath::StringType file_name = pure_file_name + file_name_ext;
483
484   // Check whether we already have same name in a case insensitive manner.
485   FileNameSet::const_iterator iter = file_name_set_.find(file_name);
486   if (iter == file_name_set_.end()) {
487     file_name_set_.insert(file_name);
488   } else {
489     // Found same name, increase the ordinal number for the file name.
490     pure_file_name =
491         base::FilePath(*iter).RemoveExtension().BaseName().value();
492     base::FilePath::StringType base_file_name =
493         StripOrdinalNumber(pure_file_name);
494
495     // We need to make sure the length of base file name plus maximum ordinal
496     // number path will be less than or equal to kMaxFilePathLength.
497     if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
498         max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
499       return false;
500
501     // Prepare the new ordinal number.
502     uint32 ordinal_number;
503     FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
504     if (it == file_name_count_map_.end()) {
505       // First base-name-conflict resolving, use 1 as initial ordinal number.
506       file_name_count_map_[base_file_name] = 1;
507       ordinal_number = 1;
508     } else {
509       // We have met same base-name conflict, use latest ordinal number.
510       ordinal_number = it->second;
511     }
512
513     if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
514       // Use a random file from temporary file.
515       base::FilePath temp_file;
516       base::CreateTemporaryFile(&temp_file);
517       file_name = temp_file.RemoveExtension().BaseName().value();
518       // Get safe pure file name.
519       if (!GetSafePureFileName(saved_main_directory_path_,
520                                base::FilePath::StringType(),
521                                max_path, &file_name))
522         return false;
523     } else {
524       for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
525         base::FilePath::StringType new_name = base_file_name +
526             base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
527         if (file_name_set_.find(new_name) == file_name_set_.end()) {
528           // Resolved name conflict.
529           file_name = new_name;
530           file_name_count_map_[base_file_name] = ++i;
531           break;
532         }
533       }
534     }
535
536     file_name_set_.insert(file_name);
537   }
538
539   DCHECK(!file_name.empty());
540   generated_name->assign(file_name);
541
542   return true;
543 }
544
545 // We have received a message from SaveFileManager about a new saving job. We
546 // create a SaveItem and store it in our in_progress list.
547 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
548   DCHECK(info && !info->url.is_empty());
549
550   SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
551   if (it == in_progress_items_.end()) {
552     // If not found, we must have cancel action.
553     DCHECK(canceled());
554     return;
555   }
556   SaveItem* save_item = it->second;
557
558   DCHECK(!saved_main_file_path_.empty());
559
560   save_item->SetSaveId(info->save_id);
561   save_item->SetTotalBytes(info->total_bytes);
562
563   // Determine the proper path for a saving job, by choosing either the default
564   // save directory, or prompting the user.
565   DCHECK(!save_item->has_final_name());
566   if (info->url != page_url_) {
567     base::FilePath::StringType generated_name;
568     // For HTML resource file, make sure it will have .htm as extension name,
569     // otherwise, when you open the saved page in Chrome again, download
570     // file manager will treat it as downloadable resource, and download it
571     // instead of opening it as HTML.
572     bool need_html_ext =
573         info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
574     if (!GenerateFileName(info->content_disposition,
575                           GURL(info->url),
576                           need_html_ext,
577                           &generated_name)) {
578       // We can not generate file name for this SaveItem, so we cancel the
579       // saving page job if the save source is from serialized DOM data.
580       // Otherwise, it means this SaveItem is sub-resource type, we treat it
581       // as an error happened on saving. We can ignore this type error for
582       // sub-resource links which will be resolved as absolute links instead
583       // of local links in final saved contents.
584       if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
585         Cancel(true);
586       else
587         SaveFinished(save_item->save_id(), 0, false);
588       return;
589     }
590
591     // When saving page as only-HTML, we only have a SaveItem whose url
592     // must be page_url_.
593     DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
594     DCHECK(!saved_main_directory_path_.empty());
595
596     // Now we get final name retrieved from GenerateFileName, we will use it
597     // rename the SaveItem.
598     base::FilePath final_name =
599         saved_main_directory_path_.Append(generated_name);
600     save_item->Rename(final_name);
601   } else {
602     // It is the main HTML file, use the name chosen by the user.
603     save_item->Rename(saved_main_file_path_);
604   }
605
606   // If the save source is from file system, inform SaveFileManager to copy
607   // corresponding file to the file path which this SaveItem specifies.
608   if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
609     BrowserThread::PostTask(
610         BrowserThread::FILE, FROM_HERE,
611         base::Bind(&SaveFileManager::SaveLocalFile,
612                    file_manager_,
613                    save_item->url(),
614                    save_item->save_id(),
615                    contents_id()));
616     return;
617   }
618
619   // Check whether we begin to require serialized HTML data.
620   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
621       wait_state_ == HTML_DATA) {
622     // Inform backend to serialize the all frames' DOM and send serialized
623     // HTML data back.
624     GetSerializedHtmlDataForCurrentPageWithLocalLinks();
625   }
626 }
627
628 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
629   if (in_process_count()) {
630     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
631         it != in_progress_items_.end(); ++it) {
632       SaveItem* save_item = it->second;
633       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
634       if (save_item->save_id() == save_id)
635         return save_item;
636     }
637   }
638   return NULL;
639 }
640
641 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
642   SaveUrlItemMap::iterator it = in_progress_items_.find(
643       save_item->url().spec());
644   DCHECK(it != in_progress_items_.end());
645   DCHECK(save_item == it->second);
646   in_progress_items_.erase(it);
647
648   if (save_item->success()) {
649     // Add it to saved_success_items_.
650     DCHECK(saved_success_items_.find(save_item->save_id()) ==
651            saved_success_items_.end());
652     saved_success_items_[save_item->save_id()] = save_item;
653   } else {
654     // Add it to saved_failed_items_.
655     DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
656            saved_failed_items_.end());
657     saved_failed_items_[save_item->url().spec()] = save_item;
658   }
659 }
660
661 // Called for updating saving state.
662 bool SavePackage::UpdateSaveProgress(int32 save_id,
663                                      int64 size,
664                                      bool write_success) {
665   // Because we might have canceled this saving job before,
666   // so we might not find corresponding SaveItem.
667   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
668   if (!save_item)
669     return false;
670
671   save_item->Update(size);
672
673   // If we got disk error, cancel whole save page job.
674   if (!write_success) {
675     // Cancel job with reason of disk error.
676     Cancel(false);
677   }
678   return true;
679 }
680
681 // Stop all page saving jobs that are in progress and instruct the file thread
682 // to delete all saved  files.
683 void SavePackage::Stop() {
684   // If we haven't moved out of the initial state, there's nothing to cancel and
685   // there won't be valid pointers for file_manager_ or download_.
686   if (wait_state_ == INITIALIZE)
687     return;
688
689   // When stopping, if it still has some items in in_progress, cancel them.
690   DCHECK(canceled());
691   if (in_process_count()) {
692     SaveUrlItemMap::iterator it = in_progress_items_.begin();
693     for (; it != in_progress_items_.end(); ++it) {
694       SaveItem* save_item = it->second;
695       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
696       save_item->Cancel();
697     }
698     // Remove all in progress item to saved map. For failed items, they will
699     // be put into saved_failed_items_, for successful item, they will be put
700     // into saved_success_items_.
701     while (in_process_count())
702       PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
703   }
704
705   // This vector contains the save ids of the save files which SaveFileManager
706   // needs to remove from its save_file_map_.
707   SaveIDList save_ids;
708   for (SavedItemMap::iterator it = saved_success_items_.begin();
709       it != saved_success_items_.end(); ++it)
710     save_ids.push_back(it->first);
711   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
712       it != saved_failed_items_.end(); ++it)
713     save_ids.push_back(it->second->save_id());
714
715   BrowserThread::PostTask(
716       BrowserThread::FILE, FROM_HERE,
717       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
718                  file_manager_,
719                  save_ids));
720
721   finished_ = true;
722   wait_state_ = FAILED;
723
724   // Inform the DownloadItem we have canceled whole save page job.
725   if (download_) {
726     download_->Cancel(false);
727     FinalizeDownloadEntry();
728   }
729 }
730
731 void SavePackage::CheckFinish() {
732   if (in_process_count() || finished_)
733     return;
734
735   base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
736                         saved_success_items_.size() > 1) ?
737                         saved_main_directory_path_ : base::FilePath();
738
739   // This vector contains the final names of all the successfully saved files
740   // along with their save ids. It will be passed to SaveFileManager to do the
741   // renaming job.
742   FinalNameList final_names;
743   for (SavedItemMap::iterator it = saved_success_items_.begin();
744       it != saved_success_items_.end(); ++it)
745     final_names.push_back(std::make_pair(it->first,
746                                          it->second->full_path()));
747
748   BrowserThread::PostTask(
749       BrowserThread::FILE, FROM_HERE,
750       base::Bind(&SaveFileManager::RenameAllFiles,
751                  file_manager_,
752                  final_names,
753                  dir,
754                  web_contents()->GetRenderProcessHost()->GetID(),
755                  web_contents()->GetRenderViewHost()->GetRoutingID(),
756                  id()));
757 }
758
759 // Successfully finished all items of this SavePackage.
760 void SavePackage::Finish() {
761   // User may cancel the job when we're moving files to the final directory.
762   if (canceled())
763     return;
764
765   wait_state_ = SUCCESSFUL;
766   finished_ = true;
767
768   // Record finish.
769   RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
770
771   // Record any errors that occurred.
772   if (wrote_to_completed_file_) {
773     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
774   }
775
776   if (wrote_to_failed_file_) {
777     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
778   }
779
780   // This vector contains the save ids of the save files which SaveFileManager
781   // needs to remove from its save_file_map_.
782   SaveIDList save_ids;
783   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
784        it != saved_failed_items_.end(); ++it)
785     save_ids.push_back(it->second->save_id());
786
787   BrowserThread::PostTask(
788       BrowserThread::FILE, FROM_HERE,
789       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
790                  file_manager_,
791                  save_ids));
792
793   if (download_) {
794     // Hack to avoid touching download_ after user cancel.
795     // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
796     // with SavePackage flow.
797     if (download_->GetState() == DownloadItem::IN_PROGRESS) {
798       if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
799         download_->DestinationUpdate(
800             all_save_items_count_, CurrentSpeed(), std::string());
801         download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
802       }
803       download_->MarkAsComplete();
804     }
805     FinalizeDownloadEntry();
806   }
807 }
808
809 // Called for updating end state.
810 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
811   // Because we might have canceled this saving job before,
812   // so we might not find corresponding SaveItem. Just ignore it.
813   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
814   if (!save_item)
815     return;
816
817   // Let SaveItem set end state.
818   save_item->Finish(size, is_success);
819   // Remove the associated save id and SavePackage.
820   file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
821
822   PutInProgressItemToSavedMap(save_item);
823
824   // Inform the DownloadItem to update UI.
825   // We use the received bytes as number of saved files.
826   // Hack to avoid touching download_ after user cancel.
827   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
828   // with SavePackage flow.
829   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
830     download_->DestinationUpdate(
831         completed_count(), CurrentSpeed(), std::string());
832   }
833
834   if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
835       save_item->url() == page_url_ && !save_item->received_bytes()) {
836     // If size of main HTML page is 0, treat it as disk error.
837     Cancel(false);
838     return;
839   }
840
841   if (canceled()) {
842     DCHECK(finished_);
843     return;
844   }
845
846   // Continue processing the save page job.
847   DoSavingProcess();
848
849   // Check whether we can successfully finish whole job.
850   CheckFinish();
851 }
852
853 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
854 // save id -1 when it encounters error. Since in this case, save id will be
855 // -1, so we can only use URL to find which SaveItem is associated with
856 // this error.
857 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
858 // from serializing HTML data, then cancel saving page.
859 void SavePackage::SaveFailed(const GURL& save_url) {
860   SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
861   if (it == in_progress_items_.end()) {
862     NOTREACHED();  // Should not exist!
863     return;
864   }
865   SaveItem* save_item = it->second;
866
867   save_item->Finish(0, false);
868
869   PutInProgressItemToSavedMap(save_item);
870
871   // Inform the DownloadItem to update UI.
872   // We use the received bytes as number of saved files.
873   // Hack to avoid touching download_ after user cancel.
874   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
875   // with SavePackage flow.
876   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
877     download_->DestinationUpdate(
878         completed_count(), CurrentSpeed(), std::string());
879   }
880
881   if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
882       (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
883       (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
884     // We got error when saving page. Treat it as disk error.
885     Cancel(true);
886   }
887
888   if (canceled()) {
889     DCHECK(finished_);
890     return;
891   }
892
893   // Continue processing the save page job.
894   DoSavingProcess();
895
896   CheckFinish();
897 }
898
899 void SavePackage::SaveCanceled(SaveItem* save_item) {
900   // Call the RemoveSaveFile in UI thread.
901   file_manager_->RemoveSaveFile(save_item->save_id(),
902                                 save_item->url(),
903                                 this);
904   if (save_item->save_id() != -1)
905     BrowserThread::PostTask(
906         BrowserThread::FILE, FROM_HERE,
907         base::Bind(&SaveFileManager::CancelSave,
908                    file_manager_,
909                    save_item->save_id()));
910 }
911
912 // Initiate a saving job of a specific URL. We send the request to
913 // SaveFileManager, which will dispatch it to different approach according to
914 // the save source. Parameter process_all_remaining_items indicates whether
915 // we need to save all remaining items.
916 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
917   DCHECK(web_contents());
918   DCHECK(waiting_item_queue_.size());
919
920   do {
921     // Pop SaveItem from waiting list.
922     SaveItem* save_item = waiting_item_queue_.front();
923     waiting_item_queue_.pop();
924
925     // Add the item to in_progress_items_.
926     SaveUrlItemMap::iterator it = in_progress_items_.find(
927         save_item->url().spec());
928     DCHECK(it == in_progress_items_.end());
929     in_progress_items_[save_item->url().spec()] = save_item;
930     save_item->Start();
931     file_manager_->SaveURL(save_item->url(),
932                            save_item->referrer(),
933                            web_contents()->GetRenderProcessHost()->GetID(),
934                            routing_id(),
935                            save_item->save_source(),
936                            save_item->full_path(),
937                            web_contents()->
938                                GetBrowserContext()->GetResourceContext(),
939                            this);
940   } while (process_all_remaining_items && waiting_item_queue_.size());
941 }
942
943 // Calculate the percentage of whole save page job.
944 int SavePackage::PercentComplete() {
945   if (!all_save_items_count_)
946     return 0;
947   else if (!in_process_count())
948     return 100;
949   else
950     return completed_count() / all_save_items_count_;
951 }
952
953 int64 SavePackage::CurrentSpeed() const {
954   base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
955   int64 diff_ms = diff.InMilliseconds();
956   return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
957 }
958
959 // Continue processing the save page job after one SaveItem has been
960 // finished.
961 void SavePackage::DoSavingProcess() {
962   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
963     // We guarantee that images and JavaScripts must be downloaded first.
964     // So when finishing all those sub-resources, we will know which
965     // sub-resource's link can be replaced with local file path, which
966     // sub-resource's link need to be replaced with absolute URL which
967     // point to its internet address because it got error when saving its data.
968
969     // Start a new SaveItem job if we still have job in waiting queue.
970     if (waiting_item_queue_.size()) {
971       DCHECK(wait_state_ == NET_FILES);
972       SaveItem* save_item = waiting_item_queue_.front();
973       if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
974         SaveNextFile(false);
975       } else if (!in_process_count()) {
976         // If there is no in-process SaveItem, it means all sub-resources
977         // have been processed. Now we need to start serializing HTML DOM
978         // for the current page to get the generated HTML data.
979         wait_state_ = HTML_DATA;
980         // All non-HTML resources have been finished, start all remaining
981         // HTML files.
982         SaveNextFile(true);
983       }
984     } else if (in_process_count()) {
985       // Continue asking for HTML data.
986       DCHECK(wait_state_ == HTML_DATA);
987     }
988   } else {
989     // Save as HTML only or MHTML.
990     DCHECK(wait_state_ == NET_FILES);
991     DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
992            (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
993     if (waiting_item_queue_.size()) {
994       DCHECK(all_save_items_count_ == waiting_item_queue_.size());
995       SaveNextFile(false);
996     }
997   }
998 }
999
1000 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
1001   bool handled = true;
1002   IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
1003     IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
1004                         OnReceivedSavableResourceLinksForCurrentPage)
1005     IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
1006                         OnReceivedSerializedHtmlData)
1007     IPC_MESSAGE_UNHANDLED(handled = false)
1008   IPC_END_MESSAGE_MAP()
1009   return handled;
1010 }
1011
1012 // After finishing all SaveItems which need to get data from net.
1013 // We collect all URLs which have local storage and send the
1014 // map:(originalURL:currentLocalPath) to render process (backend).
1015 // Then render process will serialize DOM and send data to us.
1016 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
1017   if (wait_state_ != HTML_DATA)
1018     return;
1019   std::vector<GURL> saved_links;
1020   std::vector<base::FilePath> saved_file_paths;
1021   int successful_started_items_count = 0;
1022
1023   // Collect all saved items which have local storage.
1024   // First collect the status of all the resource files and check whether they
1025   // have created local files although they have not been completely saved.
1026   // If yes, the file can be saved. Otherwise, there is a disk error, so we
1027   // need to cancel the page saving job.
1028   for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1029        it != in_progress_items_.end(); ++it) {
1030     DCHECK(it->second->save_source() ==
1031            SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1032     if (it->second->has_final_name())
1033       successful_started_items_count++;
1034     saved_links.push_back(it->second->url());
1035     saved_file_paths.push_back(it->second->file_name());
1036   }
1037
1038   // If not all file of HTML resource have been started, then wait.
1039   if (successful_started_items_count != in_process_count())
1040     return;
1041
1042   // Collect all saved success items.
1043   for (SavedItemMap::iterator it = saved_success_items_.begin();
1044        it != saved_success_items_.end(); ++it) {
1045     DCHECK(it->second->has_final_name());
1046     saved_links.push_back(it->second->url());
1047     saved_file_paths.push_back(it->second->file_name());
1048   }
1049
1050   // Get the relative directory name.
1051   base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1052
1053   Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
1054       routing_id(), saved_links, saved_file_paths, relative_dir_name));
1055 }
1056
1057 // Process the serialized HTML content data of a specified web page
1058 // retrieved from render process.
1059 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
1060                                                const std::string& data,
1061                                                int32 status) {
1062   WebPageSerializerClient::PageSerializationStatus flag =
1063       static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1064   // Check current state.
1065   if (wait_state_ != HTML_DATA)
1066     return;
1067
1068   int id = contents_id();
1069   // If the all frames are finished saving, we need to close the
1070   // remaining SaveItems.
1071   if (flag == WebPageSerializerClient::AllFramesAreFinished) {
1072     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1073          it != in_progress_items_.end(); ++it) {
1074       VLOG(20) << " " << __FUNCTION__ << "()"
1075                << " save_id = " << it->second->save_id()
1076                << " url = \"" << it->second->url().spec() << "\"";
1077       BrowserThread::PostTask(
1078           BrowserThread::FILE, FROM_HERE,
1079           base::Bind(&SaveFileManager::SaveFinished,
1080                      file_manager_,
1081                      it->second->save_id(),
1082                      it->second->url(),
1083                      id,
1084                      true));
1085     }
1086     return;
1087   }
1088
1089   SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
1090   if (it == in_progress_items_.end()) {
1091     for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1092       saved_it != saved_success_items_.end(); ++saved_it) {
1093       if (saved_it->second->url() == frame_url) {
1094         wrote_to_completed_file_ = true;
1095         break;
1096       }
1097     }
1098
1099     it = saved_failed_items_.find(frame_url.spec());
1100     if (it != saved_failed_items_.end())
1101       wrote_to_failed_file_ = true;
1102
1103     return;
1104   }
1105
1106   SaveItem* save_item = it->second;
1107   DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1108
1109   if (!data.empty()) {
1110     // Prepare buffer for saving HTML data.
1111     scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1112     memcpy(new_data->data(), data.data(), data.size());
1113
1114     // Call write file functionality in file thread.
1115     BrowserThread::PostTask(
1116         BrowserThread::FILE, FROM_HERE,
1117         base::Bind(&SaveFileManager::UpdateSaveProgress,
1118                    file_manager_,
1119                    save_item->save_id(),
1120                    new_data,
1121                    static_cast<int>(data.size())));
1122   }
1123
1124   // Current frame is completed saving, call finish in file thread.
1125   if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
1126     VLOG(20) << " " << __FUNCTION__ << "()"
1127              << " save_id = " << save_item->save_id()
1128              << " url = \"" << save_item->url().spec() << "\"";
1129     BrowserThread::PostTask(
1130         BrowserThread::FILE, FROM_HERE,
1131         base::Bind(&SaveFileManager::SaveFinished,
1132                    file_manager_,
1133                    save_item->save_id(),
1134                    save_item->url(),
1135                    id,
1136                    true));
1137   }
1138 }
1139
1140 // Ask for all savable resource links from backend, include main frame and
1141 // sub-frame.
1142 void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
1143   if (wait_state_ != START_PROCESS)
1144     return;
1145
1146   wait_state_ = RESOURCES_LIST;
1147   Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
1148                                                             page_url_));
1149 }
1150
1151 // Give backend the lists which contain all resource links that have local
1152 // storage, after which, render process will serialize DOM for generating
1153 // HTML data.
1154 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
1155     const std::vector<GURL>& resources_list,
1156     const std::vector<Referrer>& referrers_list,
1157     const std::vector<GURL>& frames_list) {
1158   if (wait_state_ != RESOURCES_LIST)
1159     return;
1160
1161   if (resources_list.size() != referrers_list.size())
1162     return;
1163
1164   all_save_items_count_ = static_cast<int>(resources_list.size()) +
1165                            static_cast<int>(frames_list.size());
1166
1167   // We use total bytes as the total number of files we want to save.
1168   // Hack to avoid touching download_ after user cancel.
1169   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1170   // with SavePackage flow.
1171   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1172     download_->SetTotalBytes(all_save_items_count_);
1173
1174   if (all_save_items_count_) {
1175     // Put all sub-resources to wait list.
1176     for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1177       const GURL& u = resources_list[i];
1178       DCHECK(u.is_valid());
1179       SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
1180           SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
1181           SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1182       SaveItem* save_item = new SaveItem(u, referrers_list[i],
1183                                          this, save_source);
1184       waiting_item_queue_.push(save_item);
1185     }
1186     // Put all HTML resources to wait list.
1187     for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
1188       const GURL& u = frames_list[i];
1189       DCHECK(u.is_valid());
1190       SaveItem* save_item = new SaveItem(
1191           u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1192       waiting_item_queue_.push(save_item);
1193     }
1194     wait_state_ = NET_FILES;
1195     DoSavingProcess();
1196   } else {
1197     // No resource files need to be saved, treat it as user cancel.
1198     Cancel(true);
1199   }
1200 }
1201
1202 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
1203     bool can_save_as_complete,
1204     const std::string& contents_mime_type,
1205     const std::string& accept_langs) {
1206   base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
1207
1208   // If the page's title matches its URL, use the URL. Try to use the last path
1209   // component or if there is none, the domain as the file name.
1210   // Normally we want to base the filename on the page title, or if it doesn't
1211   // exist, on the URL. It's not easy to tell if the page has no title, because
1212   // if the page has no title, WebContents::GetTitle() will return the page's
1213   // URL (adjusted for display purposes). Therefore, we convert the "title"
1214   // back to a URL, and if it matches the original page URL, we know the page
1215   // had no title (or had a title equal to its URL, which is fine to treat
1216   // similarly).
1217   if (title_ == net::FormatUrl(page_url_, accept_langs)) {
1218     std::string url_path;
1219     if (!page_url_.SchemeIs(url::kDataScheme)) {
1220       std::vector<std::string> url_parts;
1221       base::SplitString(page_url_.path(), '/', &url_parts);
1222       if (!url_parts.empty()) {
1223         for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
1224           url_path = url_parts[i];
1225           if (!url_path.empty())
1226             break;
1227         }
1228       }
1229       if (url_path.empty())
1230         url_path = page_url_.host();
1231     } else {
1232       url_path = "dataurl";
1233     }
1234     name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
1235   }
1236
1237   // Ask user for getting final saving name.
1238   name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
1239                                              contents_mime_type);
1240   // Adjust extension for complete types.
1241   if (can_save_as_complete)
1242     name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
1243
1244   base::FilePath::StringType file_name = name_with_proper_ext.value();
1245   base::i18n::ReplaceIllegalCharactersInPath(&file_name, ' ');
1246   return base::FilePath(file_name);
1247 }
1248
1249 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
1250   // If the file name doesn't have an extension suitable for HTML files,
1251   // append one.
1252   base::FilePath::StringType ext = name.Extension();
1253   if (!ext.empty())
1254     ext.erase(ext.begin());  // Erase preceding '.'.
1255   std::string mime_type;
1256   if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
1257       !CanSaveAsComplete(mime_type)) {
1258     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1259                           kDefaultHtmlExtension);
1260   }
1261   return name;
1262 }
1263
1264 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
1265     const std::string& contents_mime_type) {
1266   // Start extension at 1 to skip over period if non-empty.
1267   base::FilePath::StringType ext = name.Extension().length() ?
1268       name.Extension().substr(1) : name.Extension();
1269   base::FilePath::StringType suggested_extension =
1270       ExtensionForMimeType(contents_mime_type);
1271   std::string mime_type;
1272   if (!suggested_extension.empty() &&
1273       !net::GetMimeTypeFromExtension(ext, &mime_type)) {
1274     // Extension is absent or needs to be updated.
1275     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1276                     suggested_extension);
1277   }
1278   return name;
1279 }
1280
1281 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
1282     const std::string& contents_mime_type) {
1283   static const struct {
1284     const base::FilePath::CharType *mime_type;
1285     const base::FilePath::CharType *suggested_extension;
1286   } extensions[] = {
1287     { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
1288     { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
1289     { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
1290     { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
1291     { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
1292   };
1293 #if defined(OS_POSIX)
1294   base::FilePath::StringType mime_type(contents_mime_type);
1295 #elif defined(OS_WIN)
1296   base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
1297 #endif  // OS_WIN
1298   for (uint32 i = 0; i < arraysize(extensions); ++i) {
1299     if (mime_type == extensions[i].mime_type)
1300       return extensions[i].suggested_extension;
1301   }
1302   return FILE_PATH_LITERAL("");
1303 }
1304
1305 void SavePackage::GetSaveInfo() {
1306   // Can't use web_contents_ in the file thread, so get the data that we need
1307   // before calling to it.
1308   base::FilePath website_save_dir, download_save_dir;
1309   bool skip_dir_check = false;
1310   DCHECK(download_manager_);
1311   if (download_manager_->GetDelegate()) {
1312     download_manager_->GetDelegate()->GetSaveDir(
1313         web_contents()->GetBrowserContext(), &website_save_dir,
1314         &download_save_dir, &skip_dir_check);
1315   }
1316   std::string mime_type = web_contents()->GetContentsMimeType();
1317   std::string accept_languages =
1318       GetContentClient()->browser()->GetAcceptLangs(
1319           web_contents()->GetBrowserContext());
1320
1321   BrowserThread::PostTask(
1322       BrowserThread::FILE, FROM_HERE,
1323       base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
1324           website_save_dir, download_save_dir, skip_dir_check,
1325           mime_type, accept_languages));
1326 }
1327
1328 void SavePackage::CreateDirectoryOnFileThread(
1329     const base::FilePath& website_save_dir,
1330     const base::FilePath& download_save_dir,
1331     bool skip_dir_check,
1332     const std::string& mime_type,
1333     const std::string& accept_langs) {
1334   base::FilePath save_dir;
1335   // If the default html/websites save folder doesn't exist...
1336   // We skip the directory check for gdata directories on ChromeOS.
1337   if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
1338     // If the default download dir doesn't exist, create it.
1339     if (!base::DirectoryExists(download_save_dir)) {
1340       bool res = base::CreateDirectory(download_save_dir);
1341       DCHECK(res);
1342     }
1343     save_dir = download_save_dir;
1344   } else {
1345     // If it does exist, use the default save dir param.
1346     save_dir = website_save_dir;
1347   }
1348
1349   bool can_save_as_complete = CanSaveAsComplete(mime_type);
1350   base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
1351       can_save_as_complete, mime_type, accept_langs);
1352   base::FilePath::StringType pure_file_name =
1353       suggested_filename.RemoveExtension().BaseName().value();
1354   base::FilePath::StringType file_name_ext = suggested_filename.Extension();
1355
1356   // Need to make sure the suggested file name is not too long.
1357   uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
1358
1359   if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
1360     save_dir = save_dir.Append(pure_file_name + file_name_ext);
1361   } else {
1362     // Cannot create a shorter filename. This will cause the save as operation
1363     // to fail unless the user pick a shorter name. Continuing even though it
1364     // will fail because returning means no save as popup for the user, which
1365     // is even more confusing. This case should be rare though.
1366     save_dir = save_dir.Append(suggested_filename);
1367   }
1368
1369   BrowserThread::PostTask(
1370       BrowserThread::UI, FROM_HERE,
1371       base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
1372                  can_save_as_complete));
1373 }
1374
1375 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
1376                                       bool can_save_as_complete) {
1377
1378   // The WebContents which owns this SavePackage may have disappeared during
1379   // the UI->FILE->UI thread hop of
1380   // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
1381   if (!web_contents() || !download_manager_->GetDelegate())
1382     return;
1383
1384   base::FilePath::StringType default_extension;
1385   if (can_save_as_complete)
1386     default_extension = kDefaultHtmlExtension;
1387
1388   download_manager_->GetDelegate()->ChooseSavePath(
1389       web_contents(),
1390       suggested_path,
1391       default_extension,
1392       can_save_as_complete,
1393       base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
1394 }
1395
1396 void SavePackage::OnPathPicked(
1397     const base::FilePath& final_name,
1398     SavePageType type,
1399     const SavePackageDownloadCreatedCallback& download_created_callback) {
1400   DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
1401          (type == SAVE_PAGE_TYPE_AS_MHTML) ||
1402          (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
1403   // Ensure the filename is safe.
1404   saved_main_file_path_ = final_name;
1405   // TODO(asanka): This call may block on IO and shouldn't be made
1406   // from the UI thread.  See http://crbug.com/61827.
1407   net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
1408                             &saved_main_file_path_);
1409
1410   saved_main_directory_path_ = saved_main_file_path_.DirName();
1411   save_type_ = type;
1412   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
1413     // Make new directory for saving complete file.
1414     saved_main_directory_path_ = saved_main_directory_path_.Append(
1415         saved_main_file_path_.RemoveExtension().BaseName().value() +
1416         FILE_PATH_LITERAL("_files"));
1417   }
1418
1419   Init(download_created_callback);
1420 }
1421
1422 void SavePackage::StopObservation() {
1423   DCHECK(download_);
1424   DCHECK(download_manager_);
1425
1426   download_->RemoveObserver(this);
1427   download_ = NULL;
1428   download_manager_ = NULL;
1429 }
1430
1431 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
1432   StopObservation();
1433 }
1434
1435 void SavePackage::FinalizeDownloadEntry() {
1436   DCHECK(download_);
1437   DCHECK(download_manager_);
1438
1439   download_manager_->OnSavePackageSuccessfullyFinished(download_);
1440   StopObservation();
1441 }
1442
1443 }  // namespace content