- add sources.
[platform/framework/web/crosswalk.git] / src / chrome / browser / spellchecker / spellcheck_custom_dictionary.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
6
7 #include <functional>
8
9 #include "base/file_util.h"
10 #include "base/files/important_file_writer.h"
11 #include "base/md5.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
15 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/spellcheck_messages.h"
17 #include "content/public/browser/browser_thread.h"
18 #include "sync/api/sync_change.h"
19 #include "sync/api/sync_data.h"
20 #include "sync/api/sync_error_factory.h"
21 #include "sync/protocol/sync.pb.h"
22
23 using content::BrowserThread;
24 using chrome::spellcheck_common::WordList;
25 using chrome::spellcheck_common::WordSet;
26
27 namespace {
28
29 // Filename extension for backup dictionary file.
30 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
31
32 // Prefix for the checksum in the dictionary file.
33 const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
34
35 // The status of the checksum in a custom spellcheck dictionary.
36 enum ChecksumStatus {
37   VALID_CHECKSUM,
38   INVALID_CHECKSUM,
39 };
40
41 // The result of a dictionary sanitation. Can be used as a bitmap.
42 enum ChangeSanitationResult {
43   // The change is valid and can be applied as-is.
44   VALID_CHANGE = 0,
45
46   // The change contained words to be added that are not valid.
47   DETECTED_INVALID_WORDS = 1,
48
49   // The change contained words to be added that are already in the dictionary.
50   DETECTED_DUPLICATE_WORDS = 2,
51
52   // The change contained words to be removed that are not in the dictionary.
53   DETECTED_MISSING_WORDS = 4,
54 };
55
56 // Loads the file at |file_path| into the |words| container. If the file has a
57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
59 ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) {
60   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
61   words.clear();
62   std::string contents;
63   base::ReadFileToString(file_path, &contents);
64   size_t pos = contents.rfind(CHECKSUM_PREFIX);
65   if (pos != std::string::npos) {
66     std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
67     contents = contents.substr(0, pos);
68     if (checksum != base::MD5String(contents))
69       return INVALID_CHECKSUM;
70   }
71   TrimWhitespaceASCII(contents, TRIM_ALL, &contents);
72   base::SplitString(contents, '\n', &words);
73   return VALID_CHECKSUM;
74 }
75
76 // Returns true for invalid words and false for valid words.
77 bool IsInvalidWord(const std::string& word) {
78   std::string tmp;
79   return !IsStringUTF8(word) ||
80       word.length() >
81           chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES ||
82       word.empty() ||
83       TRIM_NONE != TrimWhitespaceASCII(word, TRIM_ALL, &tmp);
84 }
85
86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
87 // the dictionary checksum is not valid, but backup checksum is valid, then
88 // restores the backup and loads that into |custom_words| instead. If the backup
89 // is invalid too, then clears |custom_words|. Must be called on the file
90 // thread.
91 void LoadDictionaryFileReliably(WordList& custom_words,
92                                 const base::FilePath& path) {
93   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
94   // Load the contents and verify the checksum.
95   if (LoadFile(path, custom_words) == VALID_CHECKSUM)
96     return;
97   // Checksum is not valid. See if there's a backup.
98   base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
99   if (!base::PathExists(backup))
100     return;
101   // Load the backup and verify its checksum.
102   if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
103     return;
104   // Backup checksum is valid. Restore the backup.
105   base::CopyFile(backup, path);
106 }
107
108 // Backs up the original dictionary, saves |custom_words| and its checksum into
109 // the custom spellcheck dictionary at |path|.
110 void SaveDictionaryFileReliably(
111     const WordList& custom_words,
112     const base::FilePath& path) {
113   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
114   std::stringstream content;
115   for (WordList::const_iterator it = custom_words.begin();
116        it != custom_words.end();
117        ++it) {
118     content << *it << '\n';
119   }
120   std::string checksum = base::MD5String(content.str());
121   content << CHECKSUM_PREFIX << checksum;
122   base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
123   base::ImportantFileWriter::WriteFileAtomically(path, content.str());
124 }
125
126 // Removes duplicate and invalid words from |to_add| word list and sorts it.
127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a
128 // bitmap of |ChangeSanitationResult| values.
129 int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) {
130   // Do not add duplicate words.
131   std::sort(to_add.begin(), to_add.end());
132   WordList new_words = base::STLSetDifference<WordList>(to_add, existing);
133   new_words.erase(std::unique(new_words.begin(), new_words.end()),
134                   new_words.end());
135   int result = VALID_CHANGE;
136   if (to_add.size() != new_words.size())
137     result |= DETECTED_DUPLICATE_WORDS;
138   // Do not add invalid words.
139   size_t size = new_words.size();
140   new_words.erase(std::remove_if(new_words.begin(),
141                                  new_words.end(),
142                                  IsInvalidWord),
143                   new_words.end());
144   if (size != new_words.size())
145     result |= DETECTED_INVALID_WORDS;
146   // Save the sanitized words to be added.
147   std::swap(to_add, new_words);
148   return result;
149 }
150
151 // Removes word from |to_remove| that are missing from |existing| word list and
152 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
153 int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) {
154   // Do not remove words that are missing from the dictionary.
155   std::sort(to_remove.begin(), to_remove.end());
156   WordList found_words;
157   std::set_intersection(existing.begin(),
158                         existing.end(),
159                         to_remove.begin(),
160                         to_remove.end(),
161                         std::back_inserter(found_words));
162   int result = VALID_CHANGE;
163   if (to_remove.size() > found_words.size())
164     result |= DETECTED_MISSING_WORDS;
165   // Save the sanitized words to be removed.
166   std::swap(to_remove, found_words);
167   return result;
168 }
169
170 }  // namespace
171
172
173 SpellcheckCustomDictionary::Change::Change() {
174 }
175
176 SpellcheckCustomDictionary::Change::Change(
177     const SpellcheckCustomDictionary::Change& other)
178     : to_add_(other.to_add()),
179       to_remove_(other.to_remove()) {
180 }
181
182 SpellcheckCustomDictionary::Change::Change(const WordList& to_add)
183     : to_add_(to_add) {
184 }
185
186 SpellcheckCustomDictionary::Change::~Change() {
187 }
188
189 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
190   to_add_.push_back(word);
191 }
192
193 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
194   to_remove_.push_back(word);
195 }
196
197 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) {
198   int result = VALID_CHANGE;
199   if (!to_add_.empty())
200     result |= SanitizeWordsToAdd(words, to_add_);
201   if (!to_remove_.empty())
202     result |= SanitizeWordsToRemove(words, to_remove_);
203   return result;
204 }
205
206 const WordList& SpellcheckCustomDictionary::Change::to_add() const {
207   return to_add_;
208 }
209
210 const WordList& SpellcheckCustomDictionary::Change::to_remove() const {
211   return to_remove_;
212 }
213
214 bool SpellcheckCustomDictionary::Change::empty() const {
215   return to_add_.empty() && to_remove_.empty();
216 }
217
218 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
219     const base::FilePath& path)
220     : custom_dictionary_path_(),
221       is_loaded_(false),
222       weak_ptr_factory_(this) {
223   custom_dictionary_path_ =
224       path.Append(chrome::kCustomDictionaryFileName);
225 }
226
227 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
228 }
229
230 const WordSet& SpellcheckCustomDictionary::GetWords() const {
231   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
232   return words_;
233 }
234
235 bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
236   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
237   Change dictionary_change;
238   dictionary_change.AddWord(word);
239   int result = dictionary_change.Sanitize(GetWords());
240   Apply(dictionary_change);
241   Notify(dictionary_change);
242   Sync(dictionary_change);
243   Save(dictionary_change);
244   return result == VALID_CHANGE;
245 }
246
247 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
248   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
249   Change dictionary_change;
250   dictionary_change.RemoveWord(word);
251   int result = dictionary_change.Sanitize(GetWords());
252   Apply(dictionary_change);
253   Notify(dictionary_change);
254   Sync(dictionary_change);
255   Save(dictionary_change);
256   return result == VALID_CHANGE;
257 }
258
259 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
260   return !!words_.count(word);
261 }
262
263 void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
264   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
265   observers_.AddObserver(observer);
266 }
267
268 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
269   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
270   observers_.RemoveObserver(observer);
271 }
272
273 bool SpellcheckCustomDictionary::IsLoaded() {
274   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
275   return is_loaded_;
276 }
277
278 bool SpellcheckCustomDictionary::IsSyncing() {
279   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
280   return !!sync_processor_.get();
281 }
282
283 void SpellcheckCustomDictionary::Load() {
284   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
285   BrowserThread::PostTaskAndReplyWithResult(
286       BrowserThread::FILE,
287       FROM_HERE,
288       base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
289                  custom_dictionary_path_),
290       base::Bind(&SpellcheckCustomDictionary::OnLoaded,
291                  weak_ptr_factory_.GetWeakPtr()));
292 }
293
294 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
295     syncer::ModelType type,
296     const syncer::SyncDataList& initial_sync_data,
297     scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
298     scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
299   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
300   DCHECK(!sync_processor_.get());
301   DCHECK(!sync_error_handler_.get());
302   DCHECK(sync_processor.get());
303   DCHECK(sync_error_handler.get());
304   DCHECK_EQ(syncer::DICTIONARY, type);
305   sync_processor_ = sync_processor.Pass();
306   sync_error_handler_ = sync_error_handler.Pass();
307
308   // Build a list of words to add locally.
309   WordList to_add_locally;
310   for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin();
311        it != initial_sync_data.end();
312        ++it) {
313     DCHECK_EQ(syncer::DICTIONARY, it->GetDataType());
314     to_add_locally.push_back(it->GetSpecifics().dictionary().word());
315   }
316
317   // Add remote words locally.
318   Change to_change_locally(to_add_locally);
319   to_change_locally.Sanitize(GetWords());
320   Apply(to_change_locally);
321   Notify(to_change_locally);
322   Save(to_change_locally);
323
324   // Add as many as possible local words remotely.
325   std::sort(to_add_locally.begin(), to_add_locally.end());
326   WordList to_add_remotely = base::STLSetDifference<WordList>(words_,
327                                                               to_add_locally);
328
329   // Send local changes to the sync server.
330   Change to_change_remotely(to_add_remotely);
331   syncer::SyncMergeResult result(type);
332   result.set_error(Sync(to_change_remotely));
333   return result;
334 }
335
336 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
337   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
338   DCHECK_EQ(syncer::DICTIONARY, type);
339   sync_processor_.reset();
340   sync_error_handler_.reset();
341 }
342
343 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
344     syncer::ModelType type) const {
345   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
346   DCHECK_EQ(syncer::DICTIONARY, type);
347   syncer::SyncDataList data;
348   std::string word;
349   size_t i = 0;
350   for (WordSet::const_iterator it = words_.begin();
351        it != words_.end() &&
352            i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
353        ++it, ++i) {
354     word = *it;
355     sync_pb::EntitySpecifics specifics;
356     specifics.mutable_dictionary()->set_word(word);
357     data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
358   }
359   return data;
360 }
361
362 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
363     const tracked_objects::Location& from_here,
364     const syncer::SyncChangeList& change_list) {
365   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
366   Change dictionary_change;
367   for (syncer::SyncChangeList::const_iterator it = change_list.begin();
368        it != change_list.end();
369        ++it) {
370     DCHECK(it->IsValid());
371     std::string word = it->sync_data().GetSpecifics().dictionary().word();
372     switch (it->change_type()) {
373       case syncer::SyncChange::ACTION_ADD:
374         dictionary_change.AddWord(word);
375         break;
376       case syncer::SyncChange::ACTION_DELETE:
377         dictionary_change.RemoveWord(word);
378         break;
379       default:
380         return sync_error_handler_->CreateAndUploadError(
381             FROM_HERE,
382             "Processing sync changes failed on change type " +
383                 syncer::SyncChange::ChangeTypeToString(it->change_type()));
384     }
385   }
386
387   dictionary_change.Sanitize(GetWords());
388   Apply(dictionary_change);
389   Notify(dictionary_change);
390   Save(dictionary_change);
391
392   return syncer::SyncError();
393 }
394
395 // static
396 WordList SpellcheckCustomDictionary::LoadDictionaryFile(
397     const base::FilePath& path) {
398   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
399   WordList words;
400   LoadDictionaryFileReliably(words, path);
401   if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words))
402     SaveDictionaryFileReliably(words, path);
403   SpellCheckHostMetrics::RecordCustomWordCountStats(words.size());
404   return words;
405 }
406
407 // static
408 void SpellcheckCustomDictionary::UpdateDictionaryFile(
409     const SpellcheckCustomDictionary::Change& dictionary_change,
410     const base::FilePath& path) {
411   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
412   if (dictionary_change.empty())
413     return;
414
415   WordList custom_words;
416   LoadDictionaryFileReliably(custom_words, path);
417
418   // Add words.
419   custom_words.insert(custom_words.end(),
420                       dictionary_change.to_add().begin(),
421                       dictionary_change.to_add().end());
422
423   // Remove words.
424   std::sort(custom_words.begin(), custom_words.end());
425   WordList remaining =
426       base::STLSetDifference<WordList>(custom_words,
427                                        dictionary_change.to_remove());
428   std::swap(custom_words, remaining);
429
430   SaveDictionaryFileReliably(custom_words, path);
431 }
432
433 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) {
434   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
435   Change dictionary_change(custom_words);
436   dictionary_change.Sanitize(GetWords());
437   Apply(dictionary_change);
438   Sync(dictionary_change);
439   is_loaded_ = true;
440   FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
441 }
442
443 void SpellcheckCustomDictionary::Apply(
444     const SpellcheckCustomDictionary::Change& dictionary_change) {
445   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
446   if (!dictionary_change.to_add().empty()) {
447     words_.insert(dictionary_change.to_add().begin(),
448                   dictionary_change.to_add().end());
449   }
450   if (!dictionary_change.to_remove().empty()) {
451     WordSet updated_words =
452         base::STLSetDifference<WordSet>(words_,
453                                         dictionary_change.to_remove());
454     std::swap(words_, updated_words);
455   }
456 }
457
458 void SpellcheckCustomDictionary::Save(
459     const SpellcheckCustomDictionary::Change& dictionary_change) {
460   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
461   BrowserThread::PostTask(
462       BrowserThread::FILE,
463       FROM_HERE,
464       base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
465                  dictionary_change,
466                  custom_dictionary_path_));
467 }
468
469 syncer::SyncError SpellcheckCustomDictionary::Sync(
470     const SpellcheckCustomDictionary::Change& dictionary_change) {
471   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
472   syncer::SyncError error;
473   if (!IsSyncing() || dictionary_change.empty())
474     return error;
475
476   // The number of words on the sync server should not exceed the limits.
477   int server_size = static_cast<int>(words_.size()) -
478       static_cast<int>(dictionary_change.to_add().size());
479   int max_upload_size = std::max(
480       0,
481       static_cast<int>(
482           chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
483           server_size);
484   int upload_size = std::min(
485       static_cast<int>(dictionary_change.to_add().size()),
486       max_upload_size);
487
488   syncer::SyncChangeList sync_change_list;
489   int i = 0;
490
491   for (WordList::const_iterator it = dictionary_change.to_add().begin();
492        it != dictionary_change.to_add().end() && i < upload_size;
493        ++it, ++i) {
494     std::string word = *it;
495     sync_pb::EntitySpecifics specifics;
496     specifics.mutable_dictionary()->set_word(word);
497     sync_change_list.push_back(syncer::SyncChange(
498         FROM_HERE,
499         syncer::SyncChange::ACTION_ADD,
500         syncer::SyncData::CreateLocalData(word, word, specifics)));
501   }
502
503   for (WordList::const_iterator it = dictionary_change.to_remove().begin();
504        it != dictionary_change.to_remove().end();
505        ++it) {
506     std::string word = *it;
507     sync_pb::EntitySpecifics specifics;
508     specifics.mutable_dictionary()->set_word(word);
509     sync_change_list.push_back(syncer::SyncChange(
510         FROM_HERE,
511         syncer::SyncChange::ACTION_DELETE,
512         syncer::SyncData::CreateLocalData(word, word, specifics)));
513   }
514
515   // Send the changes to the sync processor.
516   error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
517   if (error.IsSet())
518     return error;
519
520   // Turn off syncing of this dictionary if the server already has the maximum
521   // number of words.
522   if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
523     StopSyncing(syncer::DICTIONARY);
524
525   return error;
526 }
527
528 void SpellcheckCustomDictionary::Notify(
529     const SpellcheckCustomDictionary::Change& dictionary_change) {
530   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
531   if (!IsLoaded() || dictionary_change.empty())
532     return;
533   FOR_EACH_OBSERVER(Observer,
534                     observers_,
535                     OnCustomDictionaryChanged(dictionary_change));
536 }